From 3c161384cc215ea2acdf62046b13841e28119b5d Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 4 Mar 2023 17:49:48 +0100 Subject: [PATCH 01/29] cos x GTOs integ added --- src/ao_basis/aos_in_r.irp.f | 26 +- src/ao_one_e_ints/NEED | 1 + src/ao_one_e_ints/ao_overlap.irp.f | 200 ++- src/ao_one_e_ints/kin_ao_ints.irp.f | 206 ++- src/ao_one_e_ints/pot_ao_erf_ints.irp.f | 531 +++--- src/ao_one_e_ints/pot_ao_ints.irp.f | 179 +- src/ao_two_e_ints/EZFIO.cfg | 7 - src/ao_two_e_ints/two_e_integrals.irp.f | 195 +- src/cosgtos_ao_int/EZFIO.cfg | 19 + src/cosgtos_ao_int/README.rst | 4 + src/cosgtos_ao_int/aos_cosgtos.irp.f | 210 +++ src/cosgtos_ao_int/cosgtos_ao_int.irp.f | 7 + .../gauss_legendre.irp.f | 0 src/cosgtos_ao_int/one_e_Coul_integrals.irp.f | 535 ++++++ src/cosgtos_ao_int/one_e_kin_integrals.irp.f | 223 +++ src/cosgtos_ao_int/two_e_Coul_integrals.irp.f | 1584 +++++++++++++++++ src/utils/cgtos_one_e.irp.f | 120 ++ src/utils/cgtos_utils.irp.f | 780 ++++++++ src/utils/cpx_erf.irp.f | 204 +++ src/utils/integration.irp.f | 135 +- src/utils/one_e_integration.irp.f | 72 +- src/utils/util.irp.f | 27 + 22 files changed, 4591 insertions(+), 674 deletions(-) create mode 100644 src/cosgtos_ao_int/EZFIO.cfg create mode 100644 src/cosgtos_ao_int/README.rst create mode 100644 src/cosgtos_ao_int/aos_cosgtos.irp.f create mode 100644 src/cosgtos_ao_int/cosgtos_ao_int.irp.f rename src/{ao_two_e_ints => cosgtos_ao_int}/gauss_legendre.irp.f (100%) create mode 100644 src/cosgtos_ao_int/one_e_Coul_integrals.irp.f create mode 100644 src/cosgtos_ao_int/one_e_kin_integrals.irp.f create mode 100644 src/cosgtos_ao_int/two_e_Coul_integrals.irp.f create mode 100644 src/utils/cgtos_one_e.irp.f create mode 100644 src/utils/cgtos_utils.irp.f create mode 100644 src/utils/cpx_erf.irp.f diff --git a/src/ao_basis/aos_in_r.irp.f b/src/ao_basis/aos_in_r.irp.f index 902827eb..7fcb980a 100644 --- a/src/ao_basis/aos_in_r.irp.f +++ b/src/ao_basis/aos_in_r.irp.f @@ -12,21 +12,21 @@ double precision function ao_value(i,r) integer :: power_ao(3) double precision :: accu,dx,dy,dz,r2 num_ao = ao_nucl(i) -! power_ao(1:3)= ao_power(i,1:3) -! center_ao(1:3) = nucl_coord(num_ao,1:3) -! dx = (r(1) - center_ao(1)) -! dy = (r(2) - center_ao(2)) -! dz = (r(3) - center_ao(3)) -! r2 = dx*dx + dy*dy + dz*dz -! dx = dx**power_ao(1) -! dy = dy**power_ao(2) -! dz = dz**power_ao(3) + power_ao(1:3)= ao_power(i,1:3) + center_ao(1:3) = nucl_coord(num_ao,1:3) + dx = (r(1) - center_ao(1)) + dy = (r(2) - center_ao(2)) + dz = (r(3) - center_ao(3)) + r2 = dx*dx + dy*dy + dz*dz + dx = dx**power_ao(1) + dy = dy**power_ao(2) + dz = dz**power_ao(3) accu = 0.d0 -! do m=1,ao_prim_num(i) -! beta = ao_expo_ordered_transp(m,i) -! accu += ao_coef_normalized_ordered_transp(m,i) * dexp(-beta*r2) -! enddo + do m=1,ao_prim_num(i) + beta = ao_expo_ordered_transp(m,i) + accu += ao_coef_normalized_ordered_transp(m,i) * dexp(-beta*r2) + enddo ao_value = accu * dx * dy * dz end diff --git a/src/ao_one_e_ints/NEED b/src/ao_one_e_ints/NEED index 61d23b1e..b9caaf5d 100644 --- a/src/ao_one_e_ints/NEED +++ b/src/ao_one_e_ints/NEED @@ -1,2 +1,3 @@ ao_basis pseudo +cosgtos_ao_int diff --git a/src/ao_one_e_ints/ao_overlap.irp.f b/src/ao_one_e_ints/ao_overlap.irp.f index d9061d67..597eb32a 100644 --- a/src/ao_one_e_ints/ao_overlap.irp.f +++ b/src/ao_one_e_ints/ao_overlap.irp.f @@ -1,75 +1,99 @@ - BEGIN_PROVIDER [ double precision, ao_overlap,(ao_num,ao_num) ] -&BEGIN_PROVIDER [ double precision, ao_overlap_x,(ao_num,ao_num) ] -&BEGIN_PROVIDER [ double precision, ao_overlap_y,(ao_num,ao_num) ] -&BEGIN_PROVIDER [ double precision, ao_overlap_z,(ao_num,ao_num) ] - implicit none + +! --- + + BEGIN_PROVIDER [ double precision, ao_overlap , (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_overlap_x, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_overlap_y, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_overlap_z, (ao_num, ao_num) ] + BEGIN_DOC -! Overlap between atomic basis functions: -! -! :math:`\int \chi_i(r) \chi_j(r) dr` + ! Overlap between atomic basis functions: + ! + ! :math:`\int \chi_i(r) \chi_j(r) dr` END_DOC - integer :: i,j,n,l - double precision :: f - integer :: dim1 + + implicit none + integer :: i, j, n, l, dim1, power_A(3), power_B(3) double precision :: overlap, overlap_x, overlap_y, overlap_z double precision :: alpha, beta, c double precision :: A_center(3), B_center(3) - integer :: power_A(3), power_B(3) - ao_overlap = 0.d0 + + ao_overlap = 0.d0 ao_overlap_x = 0.d0 ao_overlap_y = 0.d0 ao_overlap_z = 0.d0 - if (read_ao_integrals_overlap) then - call ezfio_get_ao_one_e_ints_ao_integrals_overlap(ao_overlap(1:ao_num, 1:ao_num)) - print *, 'AO overlap integrals read from disk' + + if(read_ao_integrals_overlap) then + + call ezfio_get_ao_one_e_ints_ao_integrals_overlap(ao_overlap(1:ao_num, 1:ao_num)) + print *, 'AO overlap integrals read from disk' + else - dim1=100 - !$OMP PARALLEL DO SCHEDULE(GUIDED) & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(A_center,B_center,power_A,power_B,& - !$OMP overlap_x,overlap_y, overlap_z, overlap, & - !$OMP alpha, beta,i,j,c) & - !$OMP SHARED(nucl_coord,ao_power,ao_prim_num, & - !$OMP ao_overlap_x,ao_overlap_y,ao_overlap_z,ao_overlap,ao_num,ao_coef_normalized_ordered_transp,ao_nucl, & - !$OMP ao_expo_ordered_transp,dim1) - do j=1,ao_num - A_center(1) = nucl_coord( ao_nucl(j), 1 ) - A_center(2) = nucl_coord( ao_nucl(j), 2 ) - A_center(3) = nucl_coord( ao_nucl(j), 3 ) - power_A(1) = ao_power( j, 1 ) - power_A(2) = ao_power( j, 2 ) - power_A(3) = ao_power( j, 3 ) - do i= 1,ao_num - B_center(1) = nucl_coord( ao_nucl(i), 1 ) - B_center(2) = nucl_coord( ao_nucl(i), 2 ) - B_center(3) = nucl_coord( ao_nucl(i), 3 ) - power_B(1) = ao_power( i, 1 ) - power_B(2) = ao_power( i, 2 ) - power_B(3) = ao_power( i, 3 ) - do n = 1,ao_prim_num(j) - alpha = ao_expo_ordered_transp(n,j) - do l = 1, ao_prim_num(i) - beta = ao_expo_ordered_transp(l,i) - call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1) - c = ao_coef_normalized_ordered_transp(n,j) * ao_coef_normalized_ordered_transp(l,i) - ao_overlap(i,j) += c * overlap - if(isnan(ao_overlap(i,j)))then - print*,'i,j',i,j - print*,'l,n',l,n - print*,'c,overlap',c,overlap - print*,overlap_x,overlap_y,overlap_z - stop - endif - ao_overlap_x(i,j) += c * overlap_x - ao_overlap_y(i,j) += c * overlap_y - ao_overlap_z(i,j) += c * overlap_z + if(use_cosgtos) then + !print*, ' use_cosgtos for ao_overlap ?', use_cosgtos + + do j = 1, ao_num + do i = 1, ao_num + ao_overlap (i,j) = ao_overlap_cosgtos (i,j) + ao_overlap_x(i,j) = ao_overlap_cosgtos_x(i,j) + ao_overlap_y(i,j) = ao_overlap_cosgtos_y(i,j) + ao_overlap_z(i,j) = ao_overlap_cosgtos_z(i,j) + enddo + enddo + + else + + dim1=100 + !$OMP PARALLEL DO SCHEDULE(GUIDED) & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(A_center,B_center,power_A,power_B,& + !$OMP overlap_x,overlap_y, overlap_z, overlap, & + !$OMP alpha, beta,i,j,c) & + !$OMP SHARED(nucl_coord,ao_power,ao_prim_num, & + !$OMP ao_overlap_x,ao_overlap_y,ao_overlap_z,ao_overlap,ao_num,ao_coef_normalized_ordered_transp,ao_nucl, & + !$OMP ao_expo_ordered_transp,dim1) + do j=1,ao_num + A_center(1) = nucl_coord( ao_nucl(j), 1 ) + A_center(2) = nucl_coord( ao_nucl(j), 2 ) + A_center(3) = nucl_coord( ao_nucl(j), 3 ) + power_A(1) = ao_power( j, 1 ) + power_A(2) = ao_power( j, 2 ) + power_A(3) = ao_power( j, 3 ) + do i= 1,ao_num + B_center(1) = nucl_coord( ao_nucl(i), 1 ) + B_center(2) = nucl_coord( ao_nucl(i), 2 ) + B_center(3) = nucl_coord( ao_nucl(i), 3 ) + power_B(1) = ao_power( i, 1 ) + power_B(2) = ao_power( i, 2 ) + power_B(3) = ao_power( i, 3 ) + do n = 1,ao_prim_num(j) + alpha = ao_expo_ordered_transp(n,j) + do l = 1, ao_prim_num(i) + beta = ao_expo_ordered_transp(l,i) + call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1) + c = ao_coef_normalized_ordered_transp(n,j) * ao_coef_normalized_ordered_transp(l,i) + ao_overlap(i,j) += c * overlap + if(isnan(ao_overlap(i,j)))then + print*,'i,j',i,j + print*,'l,n',l,n + print*,'c,overlap',c,overlap + print*,overlap_x,overlap_y,overlap_z + stop + endif + ao_overlap_x(i,j) += c * overlap_x + ao_overlap_y(i,j) += c * overlap_y + ao_overlap_z(i,j) += c * overlap_z + enddo + enddo enddo enddo - enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO + + endif + endif + if (write_ao_integrals_overlap) then call ezfio_set_ao_one_e_ints_ao_integrals_overlap(ao_overlap(1:ao_num, 1:ao_num)) print *, 'AO overlap integrals written to disk' @@ -77,6 +101,8 @@ END_PROVIDER +! --- + BEGIN_PROVIDER [ double precision, ao_overlap_imag, (ao_num, ao_num) ] implicit none BEGIN_DOC @@ -85,6 +111,8 @@ BEGIN_PROVIDER [ double precision, ao_overlap_imag, (ao_num, ao_num) ] ao_overlap_imag = 0.d0 END_PROVIDER +! --- + BEGIN_PROVIDER [ complex*16, ao_overlap_complex, (ao_num, ao_num) ] implicit none BEGIN_DOC @@ -98,41 +126,43 @@ BEGIN_PROVIDER [ complex*16, ao_overlap_complex, (ao_num, ao_num) ] enddo END_PROVIDER +! --- +BEGIN_PROVIDER [ double precision, ao_overlap_abs, (ao_num, ao_num) ] - -BEGIN_PROVIDER [ double precision, ao_overlap_abs,(ao_num,ao_num) ] - implicit none BEGIN_DOC -! Overlap between absolute values of atomic basis functions: -! -! :math:`\int |\chi_i(r)| |\chi_j(r)| dr` + ! Overlap between absolute values of atomic basis functions: + ! + ! :math:`\int |\chi_i(r)| |\chi_j(r)| dr` END_DOC - integer :: i,j,n,l - double precision :: f - integer :: dim1 - double precision :: overlap, overlap_x, overlap_y, overlap_z + + implicit none + integer :: i, j, n, l, dim1, power_A(3), power_B(3) + double precision :: overlap_x, overlap_y, overlap_z double precision :: alpha, beta double precision :: A_center(3), B_center(3) - integer :: power_A(3), power_B(3) double precision :: lower_exp_val, dx - if (is_periodic) then - do j=1,ao_num - do i= 1,ao_num - ao_overlap_abs(i,j)= cdabs(ao_overlap_complex(i,j)) + + if(is_periodic) then + + do j = 1, ao_num + do i = 1, ao_num + ao_overlap_abs(i,j) = cdabs(ao_overlap_complex(i,j)) enddo enddo + else + dim1=100 lower_exp_val = 40.d0 - !$OMP PARALLEL DO SCHEDULE(GUIDED) & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(A_center,B_center,power_A,power_B, & - !$OMP overlap_x,overlap_y, overlap_z, overlap, & - !$OMP alpha, beta,i,j,dx) & - !$OMP SHARED(nucl_coord,ao_power,ao_prim_num, & - !$OMP ao_overlap_abs,ao_num,ao_coef_normalized_ordered_transp,ao_nucl,& - !$OMP ao_expo_ordered_transp,dim1,lower_exp_val) + !$OMP PARALLEL DO SCHEDULE(GUIDED) & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(A_center,B_center,power_A,power_B, & + !$OMP overlap_x,overlap_y, overlap_z, & + !$OMP alpha, beta,i,j,dx) & + !$OMP SHARED(nucl_coord,ao_power,ao_prim_num, & + !$OMP ao_overlap_abs,ao_num,ao_coef_normalized_ordered_transp,ao_nucl,& + !$OMP ao_expo_ordered_transp,dim1,lower_exp_val) do j=1,ao_num A_center(1) = nucl_coord( ao_nucl(j), 1 ) A_center(2) = nucl_coord( ao_nucl(j), 2 ) @@ -160,10 +190,14 @@ BEGIN_PROVIDER [ double precision, ao_overlap_abs,(ao_num,ao_num) ] enddo enddo enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO + endif + END_PROVIDER +! --- + BEGIN_PROVIDER [ double precision, S_inv,(ao_num,ao_num) ] implicit none BEGIN_DOC diff --git a/src/ao_one_e_ints/kin_ao_ints.irp.f b/src/ao_one_e_ints/kin_ao_ints.irp.f index 4f117deb..a5ee0670 100644 --- a/src/ao_one_e_ints/kin_ao_ints.irp.f +++ b/src/ao_one_e_ints/kin_ao_ints.irp.f @@ -1,7 +1,10 @@ - BEGIN_PROVIDER [ double precision, ao_deriv2_x,(ao_num,ao_num) ] -&BEGIN_PROVIDER [ double precision, ao_deriv2_y,(ao_num,ao_num) ] -&BEGIN_PROVIDER [ double precision, ao_deriv2_z,(ao_num,ao_num) ] - implicit none + +! --- + + BEGIN_PROVIDER [ double precision, ao_deriv2_x, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_deriv2_y, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_deriv2_z, (ao_num, ao_num) ] + BEGIN_DOC ! Second derivative matrix elements in the |AO| basis. ! @@ -11,114 +14,131 @@ ! \langle \chi_i(x,y,z) | \frac{\partial^2}{\partial x^2} |\chi_j (x,y,z) \rangle ! END_DOC - integer :: i,j,n,l - double precision :: f - integer :: dim1 + + implicit none + integer :: i, j, n, l, dim1, power_A(3), power_B(3) double precision :: overlap, overlap_y, overlap_z double precision :: overlap_x0, overlap_y0, overlap_z0 double precision :: alpha, beta, c double precision :: A_center(3), B_center(3) - integer :: power_A(3), power_B(3) double precision :: d_a_2,d_2 - dim1=100 - ! -- Dummy call to provide everything - A_center(:) = 0.d0 - B_center(:) = 1.d0 - alpha = 1.d0 - beta = .1d0 - power_A = 1 - power_B = 0 - call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,d_a_2,overlap_z,overlap,dim1) - ! -- + if(use_cosgtos) then + !print*, 'use_cosgtos for ao_kinetic_integrals ?', use_cosgtos - !$OMP PARALLEL DO SCHEDULE(GUIDED) & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(A_center,B_center,power_A,power_B,& - !$OMP overlap_y, overlap_z, overlap, & - !$OMP alpha, beta,i,j,c,d_a_2,d_2,deriv_tmp, & - !$OMP overlap_x0,overlap_y0,overlap_z0) & - !$OMP SHARED(nucl_coord,ao_power,ao_prim_num, & - !$OMP ao_deriv2_x,ao_deriv2_y,ao_deriv2_z,ao_num,ao_coef_normalized_ordered_transp,ao_nucl, & - !$OMP ao_expo_ordered_transp,dim1) - do j=1,ao_num - A_center(1) = nucl_coord( ao_nucl(j), 1 ) - A_center(2) = nucl_coord( ao_nucl(j), 2 ) - A_center(3) = nucl_coord( ao_nucl(j), 3 ) - power_A(1) = ao_power( j, 1 ) - power_A(2) = ao_power( j, 2 ) - power_A(3) = ao_power( j, 3 ) - do i= 1,ao_num - ao_deriv2_x(i,j)= 0.d0 - ao_deriv2_y(i,j)= 0.d0 - ao_deriv2_z(i,j)= 0.d0 - B_center(1) = nucl_coord( ao_nucl(i), 1 ) - B_center(2) = nucl_coord( ao_nucl(i), 2 ) - B_center(3) = nucl_coord( ao_nucl(i), 3 ) - power_B(1) = ao_power( i, 1 ) - power_B(2) = ao_power( i, 2 ) - power_B(3) = ao_power( i, 3 ) - do n = 1,ao_prim_num(j) - alpha = ao_expo_ordered_transp(n,j) - do l = 1, ao_prim_num(i) - beta = ao_expo_ordered_transp(l,i) - call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x0,overlap_y0,overlap_z0,overlap,dim1) - c = ao_coef_normalized_ordered_transp(n,j) * ao_coef_normalized_ordered_transp(l,i) + do j = 1, ao_num + do i = 1, ao_num + ao_deriv2_x(i,j) = ao_deriv2_cosgtos_x(i,j) + ao_deriv2_y(i,j) = ao_deriv2_cosgtos_y(i,j) + ao_deriv2_z(i,j) = ao_deriv2_cosgtos_z(i,j) + enddo + enddo - power_A(1) = power_A(1)-2 - if (power_A(1)>-1) then - call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,d_a_2,overlap_y,overlap_z,overlap,dim1) - else - d_a_2 = 0.d0 - endif - power_A(1) = power_A(1)+4 - call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,d_2,overlap_y,overlap_z,overlap,dim1) - power_A(1) = power_A(1)-2 + else - double precision :: deriv_tmp - deriv_tmp = (-2.d0 * alpha * (2.d0 * power_A(1) +1.d0) * overlap_x0 & - +power_A(1) * (power_A(1)-1.d0) * d_a_2 & - +4.d0 * alpha * alpha * d_2 )*overlap_y0*overlap_z0 + dim1=100 - ao_deriv2_x(i,j) += c*deriv_tmp - power_A(2) = power_A(2)-2 - if (power_A(2)>-1) then - call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,d_a_2,overlap_z,overlap,dim1) - else - d_a_2 = 0.d0 - endif - power_A(2) = power_A(2)+4 - call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,d_2,overlap_z,overlap,dim1) - power_A(2) = power_A(2)-2 + ! -- Dummy call to provide everything + A_center(:) = 0.d0 + B_center(:) = 1.d0 + alpha = 1.d0 + beta = .1d0 + power_A = 1 + power_B = 0 + call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,d_a_2,overlap_z,overlap,dim1) + ! -- - deriv_tmp = (-2.d0 * alpha * (2.d0 * power_A(2) +1.d0 ) * overlap_y0 & - +power_A(2) * (power_A(2)-1.d0) * d_a_2 & - +4.d0 * alpha * alpha * d_2 )*overlap_x0*overlap_z0 - ao_deriv2_y(i,j) += c*deriv_tmp + !$OMP PARALLEL DO SCHEDULE(GUIDED) & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(A_center,B_center,power_A,power_B,& + !$OMP overlap_y, overlap_z, overlap, & + !$OMP alpha, beta,i,j,c,d_a_2,d_2,deriv_tmp, & + !$OMP overlap_x0,overlap_y0,overlap_z0) & + !$OMP SHARED(nucl_coord,ao_power,ao_prim_num, & + !$OMP ao_deriv2_x,ao_deriv2_y,ao_deriv2_z,ao_num,ao_coef_normalized_ordered_transp,ao_nucl, & + !$OMP ao_expo_ordered_transp,dim1) + do j=1,ao_num + A_center(1) = nucl_coord( ao_nucl(j), 1 ) + A_center(2) = nucl_coord( ao_nucl(j), 2 ) + A_center(3) = nucl_coord( ao_nucl(j), 3 ) + power_A(1) = ao_power( j, 1 ) + power_A(2) = ao_power( j, 2 ) + power_A(3) = ao_power( j, 3 ) + do i= 1,ao_num + ao_deriv2_x(i,j)= 0.d0 + ao_deriv2_y(i,j)= 0.d0 + ao_deriv2_z(i,j)= 0.d0 + B_center(1) = nucl_coord( ao_nucl(i), 1 ) + B_center(2) = nucl_coord( ao_nucl(i), 2 ) + B_center(3) = nucl_coord( ao_nucl(i), 3 ) + power_B(1) = ao_power( i, 1 ) + power_B(2) = ao_power( i, 2 ) + power_B(3) = ao_power( i, 3 ) + do n = 1,ao_prim_num(j) + alpha = ao_expo_ordered_transp(n,j) + do l = 1, ao_prim_num(i) + beta = ao_expo_ordered_transp(l,i) + call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x0,overlap_y0,overlap_z0,overlap,dim1) + c = ao_coef_normalized_ordered_transp(n,j) * ao_coef_normalized_ordered_transp(l,i) - power_A(3) = power_A(3)-2 - if (power_A(3)>-1) then - call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,overlap_z,d_a_2,overlap,dim1) - else - d_a_2 = 0.d0 - endif - power_A(3) = power_A(3)+4 - call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,overlap_z,d_2,overlap,dim1) - power_A(3) = power_A(3)-2 + power_A(1) = power_A(1)-2 + if (power_A(1)>-1) then + call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,d_a_2,overlap_y,overlap_z,overlap,dim1) + else + d_a_2 = 0.d0 + endif + power_A(1) = power_A(1)+4 + call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,d_2,overlap_y,overlap_z,overlap,dim1) + power_A(1) = power_A(1)-2 - deriv_tmp = (-2.d0 * alpha * (2.d0 * power_A(3) +1.d0 ) * overlap_z0 & - +power_A(3) * (power_A(3)-1.d0) * d_a_2 & - +4.d0 * alpha * alpha * d_2 )*overlap_x0*overlap_y0 - ao_deriv2_z(i,j) += c*deriv_tmp + double precision :: deriv_tmp + deriv_tmp = (-2.d0 * alpha * (2.d0 * power_A(1) +1.d0) * overlap_x0 & + +power_A(1) * (power_A(1)-1.d0) * d_a_2 & + +4.d0 * alpha * alpha * d_2 )*overlap_y0*overlap_z0 + ao_deriv2_x(i,j) += c*deriv_tmp + power_A(2) = power_A(2)-2 + if (power_A(2)>-1) then + call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,d_a_2,overlap_z,overlap,dim1) + else + d_a_2 = 0.d0 + endif + power_A(2) = power_A(2)+4 + call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,d_2,overlap_z,overlap,dim1) + power_A(2) = power_A(2)-2 + + deriv_tmp = (-2.d0 * alpha * (2.d0 * power_A(2) +1.d0 ) * overlap_y0 & + +power_A(2) * (power_A(2)-1.d0) * d_a_2 & + +4.d0 * alpha * alpha * d_2 )*overlap_x0*overlap_z0 + ao_deriv2_y(i,j) += c*deriv_tmp + + power_A(3) = power_A(3)-2 + if (power_A(3)>-1) then + call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,overlap_z,d_a_2,overlap,dim1) + else + d_a_2 = 0.d0 + endif + power_A(3) = power_A(3)+4 + call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,overlap_z,d_2,overlap,dim1) + power_A(3) = power_A(3)-2 + + deriv_tmp = (-2.d0 * alpha * (2.d0 * power_A(3) +1.d0 ) * overlap_z0 & + +power_A(3) * (power_A(3)-1.d0) * d_a_2 & + +4.d0 * alpha * alpha * d_2 )*overlap_x0*overlap_y0 + ao_deriv2_z(i,j) += c*deriv_tmp + + enddo + enddo enddo enddo - enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO + + endif END_PROVIDER +! --- + BEGIN_PROVIDER [double precision, ao_kinetic_integrals, (ao_num,ao_num)] implicit none BEGIN_DOC diff --git a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f index c4a573be..dddf98d4 100644 --- a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f +++ b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f @@ -1,3 +1,6 @@ + +! --- + subroutine give_all_erf_kl_ao(integrals_ao,mu_in,C_center) implicit none BEGIN_DOC @@ -15,36 +18,104 @@ subroutine give_all_erf_kl_ao(integrals_ao,mu_in,C_center) enddo end +! --- + +double precision function NAI_pol_mult_erf_ao(i_ao, j_ao, mu_in, C_center) -double precision function NAI_pol_mult_erf_ao(i_ao,j_ao,mu_in,C_center) - implicit none BEGIN_DOC + ! ! Computes the following integral : - ! $\int_{-\infty}^{infty} dr \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! $\int_{-\infty}^{infty} dr \chi_i(r) \chi_j(r) \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$. + ! END_DOC - integer, intent(in) :: i_ao,j_ao + + implicit none + integer, intent(in) :: i_ao, j_ao double precision, intent(in) :: mu_in, C_center(3) - integer :: i,j,num_A,num_B, power_A(3), power_B(3), n_pt_in - double precision :: A_center(3), B_center(3),integral, alpha,beta + + integer :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in + double precision :: A_center(3), B_center(3), integral, alpha, beta + double precision :: NAI_pol_mult_erf - num_A = ao_nucl(i_ao) - power_A(1:3)= ao_power(i_ao,1:3) + + num_A = ao_nucl(i_ao) + power_A(1:3) = ao_power(i_ao,1:3) A_center(1:3) = nucl_coord(num_A,1:3) - num_B = ao_nucl(j_ao) - power_B(1:3)= ao_power(j_ao,1:3) + num_B = ao_nucl(j_ao) + power_B(1:3) = ao_power(j_ao,1:3) B_center(1:3) = nucl_coord(num_B,1:3) + n_pt_in = n_pt_max_integrals + NAI_pol_mult_erf_ao = 0.d0 do i = 1, ao_prim_num(i_ao) alpha = ao_expo_ordered_transp(i,i_ao) do j = 1, ao_prim_num(j_ao) beta = ao_expo_ordered_transp(j,j_ao) - integral = NAI_pol_mult_erf(A_center,B_center,power_A,power_B,alpha,beta,C_center,n_pt_in,mu_in) - NAI_pol_mult_erf_ao += integral * ao_coef_normalized_ordered_transp(j,j_ao)*ao_coef_normalized_ordered_transp(i,i_ao) + + integral = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in,mu_in) + + NAI_pol_mult_erf_ao += integral * ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao) enddo enddo -end +end function NAI_pol_mult_erf_ao + +! --- + +double precision function NAI_pol_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_center) + + BEGIN_DOC + ! + ! Computes the following integral : + ! $\int_{-\infty}^{infty} dr \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$. + ! + END_DOC + + implicit none + integer, intent(in) :: i_ao, j_ao + double precision, intent(in) :: beta, B_center(3) + double precision, intent(in) :: mu_in, C_center(3) + + integer :: i, j, power_A1(3), power_A2(3), n_pt_in + double precision :: A1_center(3), A2_center(3), alpha1, alpha2, coef12, coef1, integral + + double precision, external :: NAI_pol_mult_erf_with1s, NAI_pol_mult_erf_ao + + ASSERT(beta .ge. 0.d0) + if(beta .lt. 1d-10) then + NAI_pol_mult_erf_ao_with1s = NAI_pol_mult_erf_ao(i_ao, j_ao, mu_in, C_center) + return + endif + + power_A1(1:3) = ao_power(i_ao,1:3) + power_A2(1:3) = ao_power(j_ao,1:3) + + A1_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3) + A2_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3) + + n_pt_in = n_pt_max_integrals + + NAI_pol_mult_erf_ao_with1s = 0.d0 + do i = 1, ao_prim_num(i_ao) + alpha1 = ao_expo_ordered_transp (i,i_ao) + coef1 = ao_coef_normalized_ordered_transp(i,i_ao) + + do j = 1, ao_prim_num(j_ao) + alpha2 = ao_expo_ordered_transp(j,j_ao) + coef12 = coef1 * ao_coef_normalized_ordered_transp(j,j_ao) + if(dabs(coef12) .lt. 1d-14) cycle + + integral = NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2 & + , beta, B_center, C_center, n_pt_in, mu_in ) + + NAI_pol_mult_erf_ao_with1s += integral * coef12 + enddo + enddo + +end function NAI_pol_mult_erf_ao_with1s + +! --- double precision function NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in) @@ -127,58 +198,221 @@ end function NAI_pol_mult_erf ! --- - -double precision function NAI_pol_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_center) +subroutine NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, LD_C, n_pt_in, mu_in, res_v, LD_resv, n_points) BEGIN_DOC ! ! Computes the following integral : - ! $\int_{-\infty}^{infty} dr \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$. + ! + ! .. math:: + ! + ! \int dr (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) + ! \frac{\erf(\mu |r - R_C |)}{| r - R_C |}$. ! END_DOC + include 'utils/constants.include.F' + implicit none - integer, intent(in) :: i_ao, j_ao - double precision, intent(in) :: beta, B_center(3) - double precision, intent(in) :: mu_in, C_center(3) - integer :: i, j, power_A1(3), power_A2(3), n_pt_in - double precision :: A1_center(3), A2_center(3), alpha1, alpha2, coef12, coef1, integral + integer, intent(in) :: n_pt_in, n_points, LD_C, LD_resv + integer, intent(in) :: power_A(3), power_B(3) + double precision, intent(in) :: A_center(3), B_center(3), alpha, beta, mu_in + double precision, intent(in) :: C_center(LD_C,3) + double precision, intent(out) :: res_v(LD_resv) - double precision, external :: NAI_pol_mult_erf_with1s, NAI_pol_mult_erf_ao + integer :: i, n_pt, n_pt_out, ipoint + double precision :: P_center(3) + double precision :: d(0:n_pt_in), coeff, dist, const, factor + double precision :: const_factor, dist_integral + double precision :: accu, p_inv, p, rho, p_inv_2 + double precision :: p_new, p_new2, coef_tmp - ASSERT(beta .ge. 0.d0) - if(beta .lt. 1d-10) then - NAI_pol_mult_erf_ao_with1s = NAI_pol_mult_erf_ao(i_ao, j_ao, mu_in, C_center) + double precision :: rint + + res_V(1:LD_resv) = 0.d0 + + p = alpha + beta + p_inv = 1.d0 / p + p_inv_2 = 0.5d0 * p_inv + rho = alpha * beta * p_inv + p_new = mu_in / dsqrt(p + mu_in * mu_in) + p_new2 = p_new * p_new + coef_tmp = p * p_new2 + + dist = 0.d0 + do i = 1, 3 + P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv + dist += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i)) + enddo + + const_factor = dist * rho + if(const_factor > 80.d0) then + return + endif + factor = dexp(-const_factor) + coeff = dtwo_pi * factor * p_inv * p_new + + n_pt = 2 * ( power_A(1) + power_B(1) + power_A(2) + power_B(2) + power_A(3) + power_B(3) ) + + if(n_pt == 0) then + + do ipoint = 1, n_points + dist_integral = 0.d0 + do i = 1, 3 + dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i)) + enddo + const = coef_tmp * dist_integral + + res_v(ipoint) = coeff * rint(0, const) + enddo + + else + + do ipoint = 1, n_points + dist_integral = 0.d0 + do i = 1, 3 + dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i)) + enddo + const = coef_tmp * dist_integral + + do i = 0, n_pt_in + d(i) = 0.d0 + enddo + call give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A, power_B, C_center(ipoint,1:3), n_pt_in, d, n_pt_out, p_inv_2, p_new2, P_center) + + if(n_pt_out < 0) then + res_v(ipoint) = 0.d0 + cycle + endif + + ! sum of integrals of type : int {t,[0,1]} exp-(rho.(P-Q)^2 * t^2) * t^i + accu = 0.d0 + do i = 0, n_pt_out, 2 + accu += d(i) * rint(i/2, const) + enddo + + res_v(ipoint) = accu * coeff + enddo + + endif + +end subroutine NAI_pol_mult_erf_v + +! --- + +double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2 & + , beta, B_center, C_center, n_pt_in, mu_in ) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! .. math:: + ! + ! \int dx (x - A1_x)^a_1 (x - B1_x)^a_2 \exp(-\alpha_1 (x - A1_x)^2 - \alpha_2 (x - A2_x)^2) + ! \int dy (y - A1_y)^b_1 (y - B1_y)^b_2 \exp(-\alpha_1 (y - A1_y)^2 - \alpha_2 (y - A2_y)^2) + ! \int dz (x - A1_z)^c_1 (z - B1_z)^c_2 \exp(-\alpha_1 (z - A1_z)^2 - \alpha_2 (z - A2_z)^2) + ! \exp(-\beta (r - B)^2) + ! \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + integer, intent(in) :: n_pt_in + integer, intent(in) :: power_A1(3), power_A2(3) + double precision, intent(in) :: C_center(3), A1_center(3), A2_center(3), B_center(3) + double precision, intent(in) :: alpha1, alpha2, beta, mu_in + + integer :: i, n_pt, n_pt_out + double precision :: alpha12, alpha12_inv, alpha12_inv_2, rho12, A12_center(3), dist12, const_factor12 + double precision :: p, p_inv, p_inv_2, rho, P_center(3), dist, const_factor + double precision :: dist_integral + double precision :: d(0:n_pt_in), coeff, const, factor + double precision :: accu + double precision :: p_new + + double precision :: rint + + + ! e^{-alpha1 (r - A1)^2} e^{-alpha2 (r - A2)^2} = e^{-K12} e^{-alpha12 (r - A12)^2} + alpha12 = alpha1 + alpha2 + alpha12_inv = 1.d0 / alpha12 + alpha12_inv_2 = 0.5d0 * alpha12_inv + rho12 = alpha1 * alpha2 * alpha12_inv + A12_center(1) = (alpha1 * A1_center(1) + alpha2 * A2_center(1)) * alpha12_inv + A12_center(2) = (alpha1 * A1_center(2) + alpha2 * A2_center(2)) * alpha12_inv + A12_center(3) = (alpha1 * A1_center(3) + alpha2 * A2_center(3)) * alpha12_inv + dist12 = (A1_center(1) - A2_center(1)) * (A1_center(1) - A2_center(1)) & + + (A1_center(2) - A2_center(2)) * (A1_center(2) - A2_center(2)) & + + (A1_center(3) - A2_center(3)) * (A1_center(3) - A2_center(3)) + + const_factor12 = dist12 * rho12 + if(const_factor12 > 80.d0) then + NAI_pol_mult_erf_with1s = 0.d0 return endif - power_A1(1:3) = ao_power(i_ao,1:3) - power_A2(1:3) = ao_power(j_ao,1:3) + ! --- - A1_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3) - A2_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3) + ! e^{-K12} e^{-alpha12 (r - A12)^2} e^{-beta (r - B)^2} = e^{-K} e^{-p (r - P)^2} + p = alpha12 + beta + p_inv = 1.d0 / p + p_inv_2 = 0.5d0 * p_inv + rho = alpha12 * beta * p_inv + P_center(1) = (alpha12 * A12_center(1) + beta * B_center(1)) * p_inv + P_center(2) = (alpha12 * A12_center(2) + beta * B_center(2)) * p_inv + P_center(3) = (alpha12 * A12_center(3) + beta * B_center(3)) * p_inv + dist = (A12_center(1) - B_center(1)) * (A12_center(1) - B_center(1)) & + + (A12_center(2) - B_center(2)) * (A12_center(2) - B_center(2)) & + + (A12_center(3) - B_center(3)) * (A12_center(3) - B_center(3)) - n_pt_in = n_pt_max_integrals + const_factor = const_factor12 + dist * rho + if(const_factor > 80.d0) then + NAI_pol_mult_erf_with1s = 0.d0 + return + endif - NAI_pol_mult_erf_ao_with1s = 0.d0 - do i = 1, ao_prim_num(i_ao) - alpha1 = ao_expo_ordered_transp (i,i_ao) - coef1 = ao_coef_normalized_ordered_transp(i,i_ao) + dist_integral = (P_center(1) - C_center(1)) * (P_center(1) - C_center(1)) & + + (P_center(2) - C_center(2)) * (P_center(2) - C_center(2)) & + + (P_center(3) - C_center(3)) * (P_center(3) - C_center(3)) - do j = 1, ao_prim_num(j_ao) - alpha2 = ao_expo_ordered_transp(j,j_ao) - coef12 = coef1 * ao_coef_normalized_ordered_transp(j,j_ao) - if(dabs(coef12) .lt. 1d-14) cycle + ! --- - integral = NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2 & - , beta, B_center, C_center, n_pt_in, mu_in ) + p_new = mu_in / dsqrt(p + mu_in * mu_in) + factor = dexp(-const_factor) + coeff = dtwo_pi * factor * p_inv * p_new - NAI_pol_mult_erf_ao_with1s += integral * coef12 - enddo + n_pt = 2 * ( (power_A1(1) + power_A2(1)) + (power_A1(2) + power_A2(2)) + (power_A1(3) + power_A2(3)) ) + const = p * dist_integral * p_new * p_new + if(n_pt == 0) then + NAI_pol_mult_erf_with1s = coeff * rint(0, const) + return + endif + + do i = 0, n_pt_in + d(i) = 0.d0 enddo + p_new = p_new * p_new + call give_polynomial_mult_center_one_e_erf_opt( A1_center, A2_center, power_A1, power_A2, C_center, n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center) -end function NAI_pol_mult_erf_ao_with1s + if(n_pt_out < 0) then + NAI_pol_mult_erf_with1s = 0.d0 + return + endif + + ! sum of integrals of type : int {t,[0,1]} exp-(rho.(P-Q)^2 * t^2) * t^i + accu = 0.d0 + do i = 0, n_pt_out, 2 + accu += d(i) * rint(i/2, const) + enddo + NAI_pol_mult_erf_with1s = accu * coeff + +end function NAI_pol_mult_erf_with1s + +! --- subroutine NAI_pol_mult_erf_with1s_v(A1_center, A2_center, power_A1, power_A2, alpha1, alpha2, beta, B_center, LD_B, C_center, LD_C, n_pt_in, mu_in, res_v, LD_resv, n_points) @@ -428,107 +662,6 @@ subroutine give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A end subroutine give_polynomial_mult_center_one_e_erf_opt ! --- -subroutine NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, LD_C, n_pt_in, mu_in, res_v, LD_resv, n_points) - - BEGIN_DOC - ! - ! Computes the following integral : - ! - ! .. math:: - ! - ! \int dr (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) - ! \frac{\erf(\mu |r - R_C |)}{| r - R_C |}$. - ! - END_DOC - - include 'utils/constants.include.F' - - implicit none - - integer, intent(in) :: n_pt_in, n_points, LD_C, LD_resv - integer, intent(in) :: power_A(3), power_B(3) - double precision, intent(in) :: A_center(3), B_center(3), alpha, beta, mu_in - double precision, intent(in) :: C_center(LD_C,3) - double precision, intent(out) :: res_v(LD_resv) - - integer :: i, n_pt, n_pt_out, ipoint - double precision :: P_center(3) - double precision :: d(0:n_pt_in), coeff, dist, const, factor - double precision :: const_factor, dist_integral - double precision :: accu, p_inv, p, rho, p_inv_2 - double precision :: p_new, p_new2, coef_tmp - - double precision :: rint - - res_V(1:LD_resv) = 0.d0 - - p = alpha + beta - p_inv = 1.d0 / p - p_inv_2 = 0.5d0 * p_inv - rho = alpha * beta * p_inv - p_new = mu_in / dsqrt(p + mu_in * mu_in) - p_new2 = p_new * p_new - coef_tmp = p * p_new2 - - dist = 0.d0 - do i = 1, 3 - P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv - dist += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i)) - enddo - - const_factor = dist * rho - if(const_factor > 80.d0) then - return - endif - factor = dexp(-const_factor) - coeff = dtwo_pi * factor * p_inv * p_new - - n_pt = 2 * ( power_A(1) + power_B(1) + power_A(2) + power_B(2) + power_A(3) + power_B(3) ) - - if(n_pt == 0) then - - do ipoint = 1, n_points - dist_integral = 0.d0 - do i = 1, 3 - dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i)) - enddo - const = coef_tmp * dist_integral - - res_v(ipoint) = coeff * rint(0, const) - enddo - - else - - do ipoint = 1, n_points - dist_integral = 0.d0 - do i = 1, 3 - dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i)) - enddo - const = coef_tmp * dist_integral - - do i = 0, n_pt_in - d(i) = 0.d0 - enddo - call give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A, power_B, C_center(ipoint,1:3), n_pt_in, d, n_pt_out, p_inv_2, p_new2, P_center) - - if(n_pt_out < 0) then - res_v(ipoint) = 0.d0 - cycle - endif - - ! sum of integrals of type : int {t,[0,1]} exp-(rho.(P-Q)^2 * t^2) * t^i - accu = 0.d0 - do i = 0, n_pt_out, 2 - accu += d(i) * rint(i/2, const) - enddo - - res_v(ipoint) = accu * coeff - enddo - - endif - -end subroutine NAI_pol_mult_erf_v - subroutine give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in) @@ -659,113 +792,3 @@ subroutine give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,po end -double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2 & - , beta, B_center, C_center, n_pt_in, mu_in ) - - BEGIN_DOC - ! - ! Computes the following integral : - ! - ! .. math:: - ! - ! \int dx (x - A1_x)^a_1 (x - B1_x)^a_2 \exp(-\alpha_1 (x - A1_x)^2 - \alpha_2 (x - A2_x)^2) - ! \int dy (y - A1_y)^b_1 (y - B1_y)^b_2 \exp(-\alpha_1 (y - A1_y)^2 - \alpha_2 (y - A2_y)^2) - ! \int dz (x - A1_z)^c_1 (z - B1_z)^c_2 \exp(-\alpha_1 (z - A1_z)^2 - \alpha_2 (z - A2_z)^2) - ! \exp(-\beta (r - B)^2) - ! \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$. - ! - END_DOC - - include 'utils/constants.include.F' - - implicit none - integer, intent(in) :: n_pt_in - integer, intent(in) :: power_A1(3), power_A2(3) - double precision, intent(in) :: C_center(3), A1_center(3), A2_center(3), B_center(3) - double precision, intent(in) :: alpha1, alpha2, beta, mu_in - - integer :: i, n_pt, n_pt_out - double precision :: alpha12, alpha12_inv, alpha12_inv_2, rho12, A12_center(3), dist12, const_factor12 - double precision :: p, p_inv, p_inv_2, rho, P_center(3), dist, const_factor - double precision :: dist_integral - double precision :: d(0:n_pt_in), coeff, const, factor - double precision :: accu - double precision :: p_new - - double precision :: rint - - - ! e^{-alpha1 (r - A1)^2} e^{-alpha2 (r - A2)^2} = e^{-K12} e^{-alpha12 (r - A12)^2} - alpha12 = alpha1 + alpha2 - alpha12_inv = 1.d0 / alpha12 - alpha12_inv_2 = 0.5d0 * alpha12_inv - rho12 = alpha1 * alpha2 * alpha12_inv - A12_center(1) = (alpha1 * A1_center(1) + alpha2 * A2_center(1)) * alpha12_inv - A12_center(2) = (alpha1 * A1_center(2) + alpha2 * A2_center(2)) * alpha12_inv - A12_center(3) = (alpha1 * A1_center(3) + alpha2 * A2_center(3)) * alpha12_inv - dist12 = (A1_center(1) - A2_center(1)) * (A1_center(1) - A2_center(1)) & - + (A1_center(2) - A2_center(2)) * (A1_center(2) - A2_center(2)) & - + (A1_center(3) - A2_center(3)) * (A1_center(3) - A2_center(3)) - - const_factor12 = dist12 * rho12 - if(const_factor12 > 80.d0) then - NAI_pol_mult_erf_with1s = 0.d0 - return - endif - - ! --- - - ! e^{-K12} e^{-alpha12 (r - A12)^2} e^{-beta (r - B)^2} = e^{-K} e^{-p (r - P)^2} - p = alpha12 + beta - p_inv = 1.d0 / p - p_inv_2 = 0.5d0 * p_inv - rho = alpha12 * beta * p_inv - P_center(1) = (alpha12 * A12_center(1) + beta * B_center(1)) * p_inv - P_center(2) = (alpha12 * A12_center(2) + beta * B_center(2)) * p_inv - P_center(3) = (alpha12 * A12_center(3) + beta * B_center(3)) * p_inv - dist = (A12_center(1) - B_center(1)) * (A12_center(1) - B_center(1)) & - + (A12_center(2) - B_center(2)) * (A12_center(2) - B_center(2)) & - + (A12_center(3) - B_center(3)) * (A12_center(3) - B_center(3)) - - const_factor = const_factor12 + dist * rho - if(const_factor > 80.d0) then - NAI_pol_mult_erf_with1s = 0.d0 - return - endif - - dist_integral = (P_center(1) - C_center(1)) * (P_center(1) - C_center(1)) & - + (P_center(2) - C_center(2)) * (P_center(2) - C_center(2)) & - + (P_center(3) - C_center(3)) * (P_center(3) - C_center(3)) - - ! --- - - p_new = mu_in / dsqrt(p + mu_in * mu_in) - factor = dexp(-const_factor) - coeff = dtwo_pi * factor * p_inv * p_new - - n_pt = 2 * ( (power_A1(1) + power_A2(1)) + (power_A1(2) + power_A2(2)) + (power_A1(3) + power_A2(3)) ) - const = p * dist_integral * p_new * p_new - if(n_pt == 0) then - NAI_pol_mult_erf_with1s = coeff * rint(0, const) - return - endif - - do i = 0, n_pt_in - d(i) = 0.d0 - enddo - p_new = p_new * p_new - call give_polynomial_mult_center_one_e_erf_opt( A1_center, A2_center, power_A1, power_A2, C_center, n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center) - - if(n_pt_out < 0) then - NAI_pol_mult_erf_with1s = 0.d0 - return - endif - - ! sum of integrals of type : int {t,[0,1]} exp-(rho.(P-Q)^2 * t^2) * t^i - accu = 0.d0 - do i = 0, n_pt_out, 2 - accu += d(i) * rint(i/2, const) - enddo - NAI_pol_mult_erf_with1s = accu * coeff - -end function NAI_pol_mult_erf_with1s diff --git a/src/ao_one_e_ints/pot_ao_ints.irp.f b/src/ao_one_e_ints/pot_ao_ints.irp.f index 1d92dc7d..928053ad 100644 --- a/src/ao_one_e_ints/pot_ao_ints.irp.f +++ b/src/ao_one_e_ints/pot_ao_ints.irp.f @@ -1,4 +1,8 @@ + +! --- + BEGIN_PROVIDER [ double precision, ao_integrals_n_e, (ao_num,ao_num)] + BEGIN_DOC ! Nucleus-electron interaction, in the |AO| basis set. ! @@ -6,84 +10,100 @@ BEGIN_PROVIDER [ double precision, ao_integrals_n_e, (ao_num,ao_num)] ! ! These integrals also contain the pseudopotential integrals. END_DOC + implicit none - double precision :: alpha, beta, gama, delta - integer :: num_A,num_B - double precision :: A_center(3),B_center(3),C_center(3) - integer :: power_A(3),power_B(3) - integer :: i,j,k,l,n_pt_in,m - double precision :: overlap_x,overlap_y,overlap_z,overlap,dx,NAI_pol_mult + integer :: num_A, num_B, power_A(3), power_B(3) + integer :: i, j, k, l, n_pt_in, m + double precision :: alpha, beta + double precision :: A_center(3),B_center(3),C_center(3) + double precision :: overlap_x,overlap_y,overlap_z,overlap,dx,NAI_pol_mult + + ao_integrals_n_e = 0.d0 if (read_ao_integrals_n_e) then + call ezfio_get_ao_one_e_ints_ao_integrals_n_e(ao_integrals_n_e) print *, 'AO N-e integrals read from disk' + else - ao_integrals_n_e = 0.d0 + if(use_cosgtos) then + !print *, " use_cosgtos for ao_integrals_n_e ?", use_cosgtos - ! _ - ! /| / |_) - ! | / | \ - ! + do j = 1, ao_num + do i = 1, ao_num + ao_integrals_n_e(i,j) = ao_integrals_n_e_cosgtos(i,j) + enddo + enddo - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,& - !$OMP num_A,num_B,Z,c,n_pt_in) & - !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,nucl_coord,ao_coef_normalized_ordered_transp,& - !$OMP n_pt_max_integrals,ao_integrals_n_e,nucl_num,nucl_charge) + else - n_pt_in = n_pt_max_integrals + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,& + !$OMP num_A,num_B,Z,c,c1,n_pt_in) & + !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,nucl_coord,ao_coef_normalized_ordered_transp,& + !$OMP n_pt_max_integrals,ao_integrals_n_e,nucl_num,nucl_charge) - !$OMP DO SCHEDULE (dynamic) + n_pt_in = n_pt_max_integrals - do j = 1, ao_num - num_A = ao_nucl(j) - power_A(1:3)= ao_power(j,1:3) - A_center(1:3) = nucl_coord(num_A,1:3) + !$OMP DO SCHEDULE (dynamic) - do i = 1, ao_num + do j = 1, ao_num + num_A = ao_nucl(j) + power_A(1:3)= ao_power(j,1:3) + A_center(1:3) = nucl_coord(num_A,1:3) - num_B = ao_nucl(i) - power_B(1:3)= ao_power(i,1:3) - B_center(1:3) = nucl_coord(num_B,1:3) + do i = 1, ao_num - do l=1,ao_prim_num(j) - alpha = ao_expo_ordered_transp(l,j) + num_B = ao_nucl(i) + power_B(1:3)= ao_power(i,1:3) + B_center(1:3) = nucl_coord(num_B,1:3) - do m=1,ao_prim_num(i) - beta = ao_expo_ordered_transp(m,i) + do l=1,ao_prim_num(j) + alpha = ao_expo_ordered_transp(l,j) - double precision :: c - c = 0.d0 + do m=1,ao_prim_num(i) + beta = ao_expo_ordered_transp(m,i) - do k = 1, nucl_num - double precision :: Z - Z = nucl_charge(k) + double precision :: c, c1 + c = 0.d0 - C_center(1:3) = nucl_coord(k,1:3) + do k = 1, nucl_num + double precision :: Z + Z = nucl_charge(k) - c = c - Z * NAI_pol_mult(A_center,B_center, & - power_A,power_B,alpha,beta,C_center,n_pt_in) + C_center(1:3) = nucl_coord(k,1:3) + !print *, ' ' + !print *, A_center, B_center, C_center, power_A, power_B + !print *, alpha, beta + + c1 = NAI_pol_mult( A_center, B_center, power_A, power_B & + , alpha, beta, C_center, n_pt_in ) + + !print *, ' c1 = ', c1 + + c = c - Z * c1 + + enddo + ao_integrals_n_e(i,j) = ao_integrals_n_e(i,j) & + + ao_coef_normalized_ordered_transp(l,j) & + * ao_coef_normalized_ordered_transp(m,i) * c enddo - ao_integrals_n_e(i,j) = ao_integrals_n_e(i,j) & - + ao_coef_normalized_ordered_transp(l,j) & - * ao_coef_normalized_ordered_transp(m,i) * c enddo enddo enddo - enddo !$OMP END DO !$OMP END PARALLEL - IF (DO_PSEUDO) THEN + + endif + + + IF(do_pseudo) THEN ao_integrals_n_e += ao_pseudo_integrals ENDIF - IF(point_charges) THEN - ao_integrals_n_e += ao_integrals_pt_chrg - ENDIF - endif @@ -102,7 +122,7 @@ BEGIN_PROVIDER [ double precision, ao_integrals_n_e_imag, (ao_num,ao_num)] ! :math:`\langle \chi_i | -\sum_A \frac{1}{|r-R_A|} | \chi_j \rangle` END_DOC implicit none - double precision :: alpha, beta, gama, delta + double precision :: alpha, beta integer :: num_A,num_B double precision :: A_center(3),B_center(3),C_center(3) integer :: power_A(3),power_B(3) @@ -125,7 +145,7 @@ BEGIN_PROVIDER [ double precision, ao_integrals_n_e_per_atom, (ao_num,ao_num,nuc ! :math:`\langle \chi_i | -\frac{1}{|r-R_A|} | \chi_j \rangle` END_DOC implicit none - double precision :: alpha, beta, gama, delta + double precision :: alpha, beta integer :: i_c,num_A,num_B double precision :: A_center(3),B_center(3),C_center(3) integer :: power_A(3),power_B(3) @@ -268,6 +288,7 @@ double precision function NAI_pol_mult(A_center,B_center,power_A,power_B,alpha,b end +! --- subroutine give_polynomial_mult_center_one_e(A_center,B_center,alpha,beta,power_A,power_B,C_center,n_pt_in,d,n_pt_out) implicit none @@ -579,61 +600,3 @@ double precision function V_r(n,alpha) end -double precision function V_phi(n,m) - implicit none - BEGIN_DOC - ! Computes the angular $\phi$ part of the nuclear attraction integral: - ! - ! $\int_{0}^{2 \pi} \cos(\phi)^n \sin(\phi)^m d\phi$. - END_DOC - integer :: n,m, i - double precision :: prod, Wallis - prod = 1.d0 - do i = 0,shiftr(n,1)-1 - prod = prod/ (1.d0 + dfloat(m+1)/dfloat(n-i-i-1)) - enddo - V_phi = 4.d0 * prod * Wallis(m) -end - - -double precision function V_theta(n,m) - implicit none - BEGIN_DOC - ! Computes the angular $\theta$ part of the nuclear attraction integral: - ! - ! $\int_{0}^{\pi} \cos(\theta)^n \sin(\theta)^m d\theta$ - END_DOC - integer :: n,m,i - double precision :: Wallis, prod - include 'utils/constants.include.F' - V_theta = 0.d0 - prod = 1.d0 - do i = 0,shiftr(n,1)-1 - prod = prod / (1.d0 + dfloat(m+1)/dfloat(n-i-i-1)) - enddo - V_theta = (prod+prod) * Wallis(m) -end - - -double precision function Wallis(n) - implicit none - BEGIN_DOC - ! Wallis integral: - ! - ! $\int_{0}^{\pi} \cos(\theta)^n d\theta$. - END_DOC - double precision :: fact - integer :: n,p - include 'utils/constants.include.F' - if(iand(n,1).eq.0)then - Wallis = fact(shiftr(n,1)) - Wallis = pi * fact(n) / (dble(ibset(0_8,n)) * (Wallis+Wallis)*Wallis) - else - p = shiftr(n,1) - Wallis = fact(p) - Wallis = dble(ibset(0_8,p+p)) * Wallis*Wallis / fact(p+p+1) - endif - -end - - diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index b18c65d1..dfceddb5 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -4,13 +4,6 @@ doc: Read/Write |AO| integrals from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml default: None -[ao_integrals_threshold] -type: Threshold -doc: If | (pq|rs) | < `ao_integrals_threshold` then (pq|rs) is zero -interface: ezfio,provider,ocaml -default: 1.e-15 -ezfio_name: threshold_ao - [do_direct_integrals] type: logical doc: Compute integrals on the fly (very slow, only for debugging) diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index 83fbadfd..82ffbc90 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -1,102 +1,123 @@ -double precision function ao_two_e_integral(i,j,k,l) - implicit none + +! --- + +double precision function ao_two_e_integral(i, j, k, l) + BEGIN_DOC ! integral of the AO basis or (ij|kl) ! i(r1) j(r1) 1/r12 k(r2) l(r2) END_DOC - integer,intent(in) :: i,j,k,l - integer :: p,q,r,s - double precision :: I_center(3),J_center(3),K_center(3),L_center(3) - integer :: num_i,num_j,num_k,num_l,dim1,I_power(3),J_power(3),K_power(3),L_power(3) - double precision :: integral + implicit none include 'utils/constants.include.F' + + integer, intent(in) :: i, j, k, l + + integer :: p, q, r, s + integer :: num_i,num_j,num_k,num_l,dim1,I_power(3),J_power(3),K_power(3),L_power(3) + integer :: iorder_p(3), iorder_q(3) + double precision :: I_center(3), J_center(3), K_center(3), L_center(3) + double precision :: integral double precision :: P_new(0:max_dim,3),P_center(3),fact_p,pp double precision :: Q_new(0:max_dim,3),Q_center(3),fact_q,qq - integer :: iorder_p(3), iorder_q(3) + double precision :: ao_two_e_integral_schwartz_accel - if (ao_prim_num(i) * ao_prim_num(j) * ao_prim_num(k) * ao_prim_num(l) > 1024 ) then - ao_two_e_integral = ao_two_e_integral_schwartz_accel(i,j,k,l) - else + double precision :: ao_two_e_integral_cosgtos - dim1 = n_pt_max_integrals - num_i = ao_nucl(i) - num_j = ao_nucl(j) - num_k = ao_nucl(k) - num_l = ao_nucl(l) - ao_two_e_integral = 0.d0 + if(use_cosgtos) then + !print *, ' use_cosgtos for ao_two_e_integral ?', use_cosgtos - if (num_i /= num_j .or. num_k /= num_l .or. num_j /= num_k)then - do p = 1, 3 - I_power(p) = ao_power(i,p) - J_power(p) = ao_power(j,p) - K_power(p) = ao_power(k,p) - L_power(p) = ao_power(l,p) - I_center(p) = nucl_coord(num_i,p) - J_center(p) = nucl_coord(num_j,p) - K_center(p) = nucl_coord(num_k,p) - L_center(p) = nucl_coord(num_l,p) - enddo + ao_two_e_integral = ao_two_e_integral_cosgtos(i, j, k, l) - double precision :: coef1, coef2, coef3, coef4 - double precision :: p_inv,q_inv - double precision :: general_primitive_integral + else - do p = 1, ao_prim_num(i) - coef1 = ao_coef_normalized_ordered_transp(p,i) - do q = 1, ao_prim_num(j) - coef2 = coef1*ao_coef_normalized_ordered_transp(q,j) - call give_explicit_poly_and_gaussian(P_new,P_center,pp,fact_p,iorder_p,& - ao_expo_ordered_transp(p,i),ao_expo_ordered_transp(q,j), & - I_power,J_power,I_center,J_center,dim1) - p_inv = 1.d0/pp - do r = 1, ao_prim_num(k) - coef3 = coef2*ao_coef_normalized_ordered_transp(r,k) - do s = 1, ao_prim_num(l) - coef4 = coef3*ao_coef_normalized_ordered_transp(s,l) - call give_explicit_poly_and_gaussian(Q_new,Q_center,qq,fact_q,iorder_q,& - ao_expo_ordered_transp(r,k),ao_expo_ordered_transp(s,l), & - K_power,L_power,K_center,L_center,dim1) - q_inv = 1.d0/qq - integral = general_primitive_integral(dim1, & - P_new,P_center,fact_p,pp,p_inv,iorder_p, & - Q_new,Q_center,fact_q,qq,q_inv,iorder_q) - ao_two_e_integral = ao_two_e_integral + coef4 * integral - enddo ! s - enddo ! r - enddo ! q - enddo ! p + if (ao_prim_num(i) * ao_prim_num(j) * ao_prim_num(k) * ao_prim_num(l) > 1024 ) then + + ao_two_e_integral = ao_two_e_integral_schwartz_accel(i,j,k,l) else - do p = 1, 3 - I_power(p) = ao_power(i,p) - J_power(p) = ao_power(j,p) - K_power(p) = ao_power(k,p) - L_power(p) = ao_power(l,p) - enddo - double precision :: ERI + dim1 = n_pt_max_integrals - do p = 1, ao_prim_num(i) - coef1 = ao_coef_normalized_ordered_transp(p,i) - do q = 1, ao_prim_num(j) - coef2 = coef1*ao_coef_normalized_ordered_transp(q,j) - do r = 1, ao_prim_num(k) - coef3 = coef2*ao_coef_normalized_ordered_transp(r,k) - do s = 1, ao_prim_num(l) - coef4 = coef3*ao_coef_normalized_ordered_transp(s,l) - integral = ERI( & - ao_expo_ordered_transp(p,i),ao_expo_ordered_transp(q,j),ao_expo_ordered_transp(r,k),ao_expo_ordered_transp(s,l),& - I_power(1),J_power(1),K_power(1),L_power(1), & - I_power(2),J_power(2),K_power(2),L_power(2), & - I_power(3),J_power(3),K_power(3),L_power(3)) - ao_two_e_integral = ao_two_e_integral + coef4 * integral - enddo ! s - enddo ! r - enddo ! q - enddo ! p + num_i = ao_nucl(i) + num_j = ao_nucl(j) + num_k = ao_nucl(k) + num_l = ao_nucl(l) + ao_two_e_integral = 0.d0 + + if (num_i /= num_j .or. num_k /= num_l .or. num_j /= num_k)then + do p = 1, 3 + I_power(p) = ao_power(i,p) + J_power(p) = ao_power(j,p) + K_power(p) = ao_power(k,p) + L_power(p) = ao_power(l,p) + I_center(p) = nucl_coord(num_i,p) + J_center(p) = nucl_coord(num_j,p) + K_center(p) = nucl_coord(num_k,p) + L_center(p) = nucl_coord(num_l,p) + enddo + + double precision :: coef1, coef2, coef3, coef4 + double precision :: p_inv,q_inv + double precision :: general_primitive_integral + + do p = 1, ao_prim_num(i) + coef1 = ao_coef_normalized_ordered_transp(p,i) + do q = 1, ao_prim_num(j) + coef2 = coef1*ao_coef_normalized_ordered_transp(q,j) + call give_explicit_poly_and_gaussian(P_new,P_center,pp,fact_p,iorder_p,& + ao_expo_ordered_transp(p,i),ao_expo_ordered_transp(q,j), & + I_power,J_power,I_center,J_center,dim1) + p_inv = 1.d0/pp + do r = 1, ao_prim_num(k) + coef3 = coef2*ao_coef_normalized_ordered_transp(r,k) + do s = 1, ao_prim_num(l) + coef4 = coef3*ao_coef_normalized_ordered_transp(s,l) + call give_explicit_poly_and_gaussian(Q_new,Q_center,qq,fact_q,iorder_q,& + ao_expo_ordered_transp(r,k),ao_expo_ordered_transp(s,l), & + K_power,L_power,K_center,L_center,dim1) + q_inv = 1.d0/qq + integral = general_primitive_integral(dim1, & + P_new,P_center,fact_p,pp,p_inv,iorder_p, & + Q_new,Q_center,fact_q,qq,q_inv,iorder_q) + ao_two_e_integral = ao_two_e_integral + coef4 * integral + enddo ! s + enddo ! r + enddo ! q + enddo ! p + + else + + do p = 1, 3 + I_power(p) = ao_power(i,p) + J_power(p) = ao_power(j,p) + K_power(p) = ao_power(k,p) + L_power(p) = ao_power(l,p) + enddo + double precision :: ERI + + do p = 1, ao_prim_num(i) + coef1 = ao_coef_normalized_ordered_transp(p,i) + do q = 1, ao_prim_num(j) + coef2 = coef1*ao_coef_normalized_ordered_transp(q,j) + do r = 1, ao_prim_num(k) + coef3 = coef2*ao_coef_normalized_ordered_transp(r,k) + do s = 1, ao_prim_num(l) + coef4 = coef3*ao_coef_normalized_ordered_transp(s,l) + integral = ERI( & + ao_expo_ordered_transp(p,i),ao_expo_ordered_transp(q,j),ao_expo_ordered_transp(r,k),ao_expo_ordered_transp(s,l),& + I_power(1),J_power(1),K_power(1),L_power(1), & + I_power(2),J_power(2),K_power(2),L_power(2), & + I_power(3),J_power(3),K_power(3),L_power(3)) + ao_two_e_integral = ao_two_e_integral + coef4 * integral + enddo ! s + enddo ! r + enddo ! q + enddo ! p + + endif endif @@ -104,6 +125,8 @@ double precision function ao_two_e_integral(i,j,k,l) end +! --- + double precision function ao_two_e_integral_schwartz_accel(i,j,k,l) implicit none BEGIN_DOC @@ -421,14 +444,17 @@ BEGIN_PROVIDER [ logical, ao_two_e_integrals_in_map ] END_PROVIDER -BEGIN_PROVIDER [ double precision, ao_two_e_integral_schwartz,(ao_num,ao_num) ] - implicit none +! --- + +BEGIN_PROVIDER [ double precision, ao_two_e_integral_schwartz, (ao_num, ao_num) ] + BEGIN_DOC ! Needed to compute Schwartz inequalities END_DOC - integer :: i,k - double precision :: ao_two_e_integral,cpu_1,cpu_2, wall_1, wall_2 + implicit none + integer :: i, k + double precision :: ao_two_e_integral,cpu_1,cpu_2, wall_1, wall_2 ao_two_e_integral_schwartz(1,1) = ao_two_e_integral(1,1,1,1) !$OMP PARALLEL DO PRIVATE(i,k) & @@ -445,6 +471,7 @@ BEGIN_PROVIDER [ double precision, ao_two_e_integral_schwartz,(ao_num,ao_num) ] END_PROVIDER +! --- double precision function general_primitive_integral(dim, & P_new,P_center,fact_p,p,p_inv,iorder_p, & diff --git a/src/cosgtos_ao_int/EZFIO.cfg b/src/cosgtos_ao_int/EZFIO.cfg new file mode 100644 index 00000000..8edeecd0 --- /dev/null +++ b/src/cosgtos_ao_int/EZFIO.cfg @@ -0,0 +1,19 @@ +[ao_expoim_cosgtos] +type: double precision +doc: imag part for Exponents for each primitive of each cosGTOs |AO| +size: (ao_basis.ao_num,ao_basis.ao_prim_num_max) +interface: ezfio, provider + +[use_cosgtos] +type: logical +doc: If true, use cosgtos for AO integrals +interface: ezfio,provider,ocaml +default: False + +[ao_integrals_threshold] +type: Threshold +doc: If | (pq|rs) | < `ao_integrals_threshold` then (pq|rs) is zero +interface: ezfio,provider,ocaml +default: 1.e-15 +ezfio_name: threshold_ao + diff --git a/src/cosgtos_ao_int/README.rst b/src/cosgtos_ao_int/README.rst new file mode 100644 index 00000000..01f25d6d --- /dev/null +++ b/src/cosgtos_ao_int/README.rst @@ -0,0 +1,4 @@ +============== +cosgtos_ao_int +============== + diff --git a/src/cosgtos_ao_int/aos_cosgtos.irp.f b/src/cosgtos_ao_int/aos_cosgtos.irp.f new file mode 100644 index 00000000..6a4d54fd --- /dev/null +++ b/src/cosgtos_ao_int/aos_cosgtos.irp.f @@ -0,0 +1,210 @@ + +! --- + +BEGIN_PROVIDER [ double precision, ao_coef_norm_ord_transp_cosgtos, (ao_prim_num_max, ao_num) ] + + implicit none + integer :: i, j + + do j = 1, ao_num + do i = 1, ao_prim_num_max + ao_coef_norm_ord_transp_cosgtos(i,j) = ao_coef_norm_ord_cosgtos(j,i) + enddo + enddo + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ complex*16, ao_expo_ord_transp_cosgtos, (ao_prim_num_max, ao_num) ] + + implicit none + integer :: i, j + + do j = 1, ao_num + do i = 1, ao_prim_num_max + ao_expo_ord_transp_cosgtos(i,j) = ao_expo_ord_cosgtos(j,i) + enddo + enddo + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, ao_coef_norm_cosgtos, (ao_num, ao_prim_num_max) ] + + implicit none + + integer :: i, j, powA(3), nz + double precision :: norm + complex*16 :: overlap_x, overlap_y, overlap_z, C_A(3) + complex*16 :: integ1, integ2, expo + + nz = 100 + + C_A(1) = (0.d0, 0.d0) + C_A(2) = (0.d0, 0.d0) + C_A(3) = (0.d0, 0.d0) + + ao_coef_norm_cosgtos = 0.d0 + + do i = 1, ao_num + + powA(1) = ao_power(i,1) + powA(2) = ao_power(i,2) + powA(3) = ao_power(i,3) + + ! Normalization of the primitives + if(primitives_normalized) then + + do j = 1, ao_prim_num(i) + + expo = ao_expo(i,j) + (0.d0, 1.d0) * ao_expoim_cosgtos(i,j) + + call overlap_cgaussian_xyz(C_A, C_A, expo, expo, powA, powA, overlap_x, overlap_y, overlap_z, integ1, nz) + call overlap_cgaussian_xyz(C_A, C_A, conjg(expo), expo, powA, powA, overlap_x, overlap_y, overlap_z, integ2, nz) + + norm = 2.d0 * real( integ1 + integ2 ) + + ao_coef_norm_cosgtos(i,j) = ao_coef(i,j) / dsqrt(norm) + enddo + + else + + do j = 1, ao_prim_num(i) + ao_coef_norm_cosgtos(i,j) = ao_coef(i,j) + enddo + + endif + + enddo + +END_PROVIDER + +! --- + + BEGIN_PROVIDER [ double precision, ao_coef_norm_ord_cosgtos, (ao_num, ao_prim_num_max) ] +&BEGIN_PROVIDER [ complex*16 , ao_expo_ord_cosgtos, (ao_num, ao_prim_num_max) ] + + implicit none + integer :: i, j + integer :: iorder(ao_prim_num_max) + double precision :: d(ao_prim_num_max,3) + + d = 0.d0 + + do i = 1, ao_num + + do j = 1, ao_prim_num(i) + iorder(j) = j + d(j,1) = ao_expo(i,j) + d(j,2) = ao_coef_norm_cosgtos(i,j) + d(j,3) = ao_expoim_cosgtos(i,j) + enddo + + call dsort (d(1,1), iorder, ao_prim_num(i)) + call dset_order(d(1,2), iorder, ao_prim_num(i)) + call dset_order(d(1,3), iorder, ao_prim_num(i)) + + do j = 1, ao_prim_num(i) + ao_expo_ord_cosgtos (i,j) = d(j,1) + (0.d0, 1.d0) * d(j,3) + ao_coef_norm_ord_cosgtos(i,j) = d(j,2) + enddo + + enddo + +END_PROVIDER + +! --- + + BEGIN_PROVIDER [ double precision, ao_overlap_cosgtos, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_overlap_cosgtos_x, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_overlap_cosgtos_y, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_overlap_cosgtos_z, (ao_num, ao_num) ] + + implicit none + integer :: i, j, n, l, dim1, power_A(3), power_B(3) + double precision :: c, overlap, overlap_x, overlap_y, overlap_z + complex*16 :: alpha, beta, A_center(3), B_center(3) + complex*16 :: overlap1, overlap_x1, overlap_y1, overlap_z1 + complex*16 :: overlap2, overlap_x2, overlap_y2, overlap_z2 + + ao_overlap_cosgtos = 0.d0 + ao_overlap_cosgtos_x = 0.d0 + ao_overlap_cosgtos_y = 0.d0 + ao_overlap_cosgtos_z = 0.d0 + + dim1 = 100 + + !$OMP PARALLEL DO SCHEDULE(GUIDED) & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE( A_center, B_center, power_A, power_B, alpha, beta, i, j, n, l, c & + !$OMP , overlap_x , overlap_y , overlap_z , overlap & + !$OMP , overlap_x1, overlap_y1, overlap_z1, overlap1 & + !$OMP , overlap_x2, overlap_y2, overlap_z2, overlap2 ) & + !$OMP SHARED( nucl_coord, ao_power, ao_prim_num, ao_num, ao_nucl, dim1 & + !$OMP , ao_overlap_cosgtos_x, ao_overlap_cosgtos_y, ao_overlap_cosgtos_z, ao_overlap_cosgtos & + !$OMP , ao_coef_norm_ord_transp_cosgtos, ao_expo_ord_transp_cosgtos ) + + do j = 1, ao_num + + A_center(1) = nucl_coord(ao_nucl(j),1) * (1.d0, 0.d0) + A_center(2) = nucl_coord(ao_nucl(j),2) * (1.d0, 0.d0) + A_center(3) = nucl_coord(ao_nucl(j),3) * (1.d0, 0.d0) + power_A(1) = ao_power(j,1) + power_A(2) = ao_power(j,2) + power_A(3) = ao_power(j,3) + + do i = 1, ao_num + + B_center(1) = nucl_coord(ao_nucl(i),1) * (1.d0, 0.d0) + B_center(2) = nucl_coord(ao_nucl(i),2) * (1.d0, 0.d0) + B_center(3) = nucl_coord(ao_nucl(i),3) * (1.d0, 0.d0) + power_B(1) = ao_power(i,1) + power_B(2) = ao_power(i,2) + power_B(3) = ao_power(i,3) + + do n = 1, ao_prim_num(j) + alpha = ao_expo_ord_transp_cosgtos(n,j) + + do l = 1, ao_prim_num(i) + c = ao_coef_norm_ord_transp_cosgtos(n,j) * ao_coef_norm_ord_transp_cosgtos(l,i) + beta = ao_expo_ord_transp_cosgtos(l,i) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_x1, overlap_y1, overlap_z1, overlap1, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, conjg(alpha), beta, power_A, power_B & + , overlap_x2, overlap_y2, overlap_z2, overlap2, dim1 ) + + overlap_x = 2.d0 * real( overlap_x1 + overlap_x2 ) + overlap_y = 2.d0 * real( overlap_y1 + overlap_y2 ) + overlap_z = 2.d0 * real( overlap_z1 + overlap_z2 ) + overlap = 2.d0 * real( overlap1 + overlap2 ) + + ao_overlap_cosgtos(i,j) = ao_overlap_cosgtos(i,j) + c * overlap + + if( isnan(ao_overlap_cosgtos(i,j)) ) then + print*,'i, j', i, j + print*,'l, n', l, n + print*,'c, overlap', c, overlap + print*, overlap_x, overlap_y, overlap_z + stop + endif + + ao_overlap_cosgtos_x(i,j) = ao_overlap_cosgtos_x(i,j) + c * overlap_x + ao_overlap_cosgtos_y(i,j) = ao_overlap_cosgtos_y(i,j) + c * overlap_y + ao_overlap_cosgtos_z(i,j) = ao_overlap_cosgtos_z(i,j) + c * overlap_z + + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + +! --- + + + diff --git a/src/cosgtos_ao_int/cosgtos_ao_int.irp.f b/src/cosgtos_ao_int/cosgtos_ao_int.irp.f new file mode 100644 index 00000000..d65dfba5 --- /dev/null +++ b/src/cosgtos_ao_int/cosgtos_ao_int.irp.f @@ -0,0 +1,7 @@ +program cosgtos_ao_int + implicit none + BEGIN_DOC +! TODO : Put the documentation of the program here + END_DOC + print *, 'Hello world' +end diff --git a/src/ao_two_e_ints/gauss_legendre.irp.f b/src/cosgtos_ao_int/gauss_legendre.irp.f similarity index 100% rename from src/ao_two_e_ints/gauss_legendre.irp.f rename to src/cosgtos_ao_int/gauss_legendre.irp.f diff --git a/src/cosgtos_ao_int/one_e_Coul_integrals.irp.f b/src/cosgtos_ao_int/one_e_Coul_integrals.irp.f new file mode 100644 index 00000000..7f94f226 --- /dev/null +++ b/src/cosgtos_ao_int/one_e_Coul_integrals.irp.f @@ -0,0 +1,535 @@ + +! --- + +BEGIN_PROVIDER [ double precision, ao_integrals_n_e_cosgtos, (ao_num, ao_num)] + + BEGIN_DOC + ! + ! Nucleus-electron interaction, in the cosgtos |AO| basis set. + ! + ! :math:`\langle \chi_i | -\sum_A \frac{1}{|r-R_A|} | \chi_j \rangle` + ! + END_DOC + + implicit none + integer :: num_A, num_B, power_A(3), power_B(3) + integer :: i, j, k, l, n_pt_in, m + double precision :: c, Z, A_center(3), B_center(3), C_center(3) + complex*16 :: alpha, beta, c1, c2 + + complex*16 :: NAI_pol_mult_cosgtos + + ao_integrals_n_e_cosgtos = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE ( i, j, k, l, m, alpha, beta, A_center, B_center, C_center & + !$OMP , power_A, power_B, num_A, num_B, Z, c, c1, c2, n_pt_in ) & + !$OMP SHARED ( ao_num, ao_prim_num, ao_nucl, nucl_coord, ao_power, nucl_num, nucl_charge & + !$OMP , ao_expo_ord_transp_cosgtos, ao_coef_norm_ord_transp_cosgtos & + !$OMP , n_pt_max_integrals, ao_integrals_n_e_cosgtos ) + + n_pt_in = n_pt_max_integrals + + !$OMP DO SCHEDULE (dynamic) + + do j = 1, ao_num + num_A = ao_nucl(j) + power_A(1:3) = ao_power(j,1:3) + A_center(1:3) = nucl_coord(num_A,1:3) + + do i = 1, ao_num + num_B = ao_nucl(i) + power_B(1:3) = ao_power(i,1:3) + B_center(1:3) = nucl_coord(num_B,1:3) + + do l = 1, ao_prim_num(j) + alpha = ao_expo_ord_transp_cosgtos(l,j) + + do m = 1, ao_prim_num(i) + beta = ao_expo_ord_transp_cosgtos(m,i) + + c = 0.d0 + do k = 1, nucl_num + + Z = nucl_charge(k) + + C_center(1:3) = nucl_coord(k,1:3) + + !print *, ' ' + !print *, A_center, B_center, C_center, power_A, power_B + !print *, real(alpha), real(beta) + + c1 = NAI_pol_mult_cosgtos( A_center, B_center, power_A, power_B & + , alpha, beta, C_center, n_pt_in ) + + !c2 = c1 + c2 = NAI_pol_mult_cosgtos( A_center, B_center, power_A, power_B & + , conjg(alpha), beta, C_center, n_pt_in ) + + !print *, ' c1 = ', real(c1) + !print *, ' c2 = ', real(c2) + + c = c - Z * 2.d0 * real(c1 + c2) + + enddo + ao_integrals_n_e_cosgtos(i,j) = ao_integrals_n_e_cosgtos(i,j) & + + ao_coef_norm_ord_transp_cosgtos(l,j) & + * ao_coef_norm_ord_transp_cosgtos(m,i) * c + enddo + enddo + enddo + enddo + + !$OMP END DO + !$OMP END PARALLEL + +END_PROVIDER + +! --- + +complex*16 function NAI_pol_mult_cosgtos(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in) + + BEGIN_DOC + ! + ! Computes the electron-nucleus attraction with two primitves cosgtos. + ! + ! :math:`\langle g_i | \frac{1}{|r-R_c|} | g_j \rangle` + ! + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: n_pt_in, power_A(3), power_B(3) + double precision, intent(in) :: C_center(3), A_center(3), B_center(3) + complex*16, intent(in) :: alpha, beta + + integer :: i, n_pt, n_pt_out + double precision :: dist, const_mod + complex*16 :: p, p_inv, rho, dist_integral, const, const_factor, coeff, factor + complex*16 :: accu, P_center(3) + complex*16 :: d(0:n_pt_in) + + complex*16 :: V_n_e_cosgtos + complex*16 :: crint + + if ( (A_center(1)/=B_center(1)) .or. (A_center(2)/=B_center(2)) .or. (A_center(3)/=B_center(3)) .or. & + (A_center(1)/=C_center(1)) .or. (A_center(2)/=C_center(2)) .or. (A_center(3)/=C_center(3)) ) then + + continue + + else + + NAI_pol_mult_cosgtos = V_n_e_cosgtos( power_A(1), power_A(2), power_A(3) & + , power_B(1), power_B(2), power_B(3) & + , alpha, beta ) + return + + endif + + p = alpha + beta + p_inv = (1.d0, 0.d0) / p + rho = alpha * beta * p_inv + + dist = 0.d0 + dist_integral = (0.d0, 0.d0) + do i = 1, 3 + P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv + dist += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i)) + dist_integral += (P_center(i) - C_center(i)) * (P_center(i) - C_center(i)) + enddo + + const_factor = dist * rho + const = p * dist_integral + + const_mod = dsqrt(real(const_factor)*real(const_factor) + aimag(const_factor)*aimag(const_factor)) + if(const_mod > 80.d0) then + NAI_pol_mult_cosgtos = (0.d0, 0.d0) + return + endif + + factor = zexp(-const_factor) + coeff = dtwo_pi * factor * p_inv + + do i = 0, n_pt_in + d(i) = (0.d0, 0.d0) + enddo + + n_pt = 2 * ( (power_A(1) + power_B(1)) + (power_A(2) + power_B(2)) + (power_A(3) + power_B(3)) ) + if(n_pt == 0) then + NAI_pol_mult_cosgtos = coeff * crint(0, const) + return + endif + + call give_cpolynomial_mult_center_one_e( A_center, B_center, alpha, beta & + , power_A, power_B, C_center, n_pt_in, d, n_pt_out) + + if(n_pt_out < 0) then + NAI_pol_mult_cosgtos = (0.d0, 0.d0) + return + endif + + accu = (0.d0, 0.d0) + do i = 0, n_pt_out, 2 + accu += crint(shiftr(i, 1), const) * d(i) + +! print *, shiftr(i, 1), real(const), real(d(i)), real(crint(shiftr(i, 1), const)) + enddo + NAI_pol_mult_cosgtos = accu * coeff + +end function NAI_pol_mult_cosgtos + +! --- + +subroutine give_cpolynomial_mult_center_one_e( A_center, B_center, alpha, beta & + , power_A, power_B, C_center, n_pt_in, d, n_pt_out) + + BEGIN_DOC + ! Returns the explicit polynomial in terms of the "t" variable of the following + ! + ! $I_{x1}(a_x, d_x,p,q) \times I_{x1}(a_y, d_y,p,q) \times I_{x1}(a_z, d_z,p,q)$. + END_DOC + + implicit none + + integer, intent(in) :: n_pt_in, power_A(3), power_B(3) + double precision, intent(in) :: A_center(3), B_center(3), C_center(3) + complex*16, intent(in) :: alpha, beta + integer, intent(out) :: n_pt_out + complex*16, intent(out) :: d(0:n_pt_in) + + integer :: a_x, b_x, a_y, b_y, a_z, b_z + integer :: n_pt1, n_pt2, n_pt3, dim, i, n_pt_tmp + complex*16 :: p, P_center(3), rho, p_inv, p_inv_2 + complex*16 :: R1x(0:2), B01(0:2), R1xp(0:2),R2x(0:2) + complex*16 :: d1(0:n_pt_in), d2(0:n_pt_in), d3(0:n_pt_in) + + ASSERT (n_pt_in > 1) + + p = alpha + beta + p_inv = (1.d0, 0.d0) / p + p_inv_2 = 0.5d0 * p_inv + + do i = 1, 3 + P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv + enddo + + do i = 0, n_pt_in + d(i) = (0.d0, 0.d0) + d1(i) = (0.d0, 0.d0) + d2(i) = (0.d0, 0.d0) + d3(i) = (0.d0, 0.d0) + enddo + + ! --- + + n_pt1 = n_pt_in + + R1x(0) = (P_center(1) - A_center(1)) + R1x(1) = (0.d0, 0.d0) + R1x(2) = -(P_center(1) - C_center(1)) + + R1xp(0) = (P_center(1) - B_center(1)) + R1xp(1) = (0.d0, 0.d0) + R1xp(2) = -(P_center(1) - C_center(1)) + + R2x(0) = p_inv_2 + R2x(1) = (0.d0, 0.d0) + R2x(2) = -p_inv_2 + + a_x = power_A(1) + b_x = power_B(1) + call I_x1_pol_mult_one_e_cosgtos(a_x, b_x, R1x, R1xp, R2x, d1, n_pt1, n_pt_in) + + if(n_pt1 < 0) then + n_pt_out = -1 + do i = 0, n_pt_in + d(i) = (0.d0, 0.d0) + enddo + return + endif + + ! --- + + n_pt2 = n_pt_in + + R1x(0) = (P_center(2) - A_center(2)) + R1x(1) = (0.d0, 0.d0) + R1x(2) = -(P_center(2) - C_center(2)) + + R1xp(0) = (P_center(2) - B_center(2)) + R1xp(1) = (0.d0, 0.d0) + R1xp(2) = -(P_center(2) - C_center(2)) + + a_y = power_A(2) + b_y = power_B(2) + call I_x1_pol_mult_one_e_cosgtos(a_y, b_y, R1x, R1xp, R2x, d2, n_pt2, n_pt_in) + + if(n_pt2 < 0) then + n_pt_out = -1 + do i = 0, n_pt_in + d(i) = (0.d0, 0.d0) + enddo + return + endif + + ! --- + + n_pt3 = n_pt_in + + R1x(0) = (P_center(3) - A_center(3)) + R1x(1) = (0.d0, 0.d0) + R1x(2) = -(P_center(3) - C_center(3)) + + R1xp(0) = (P_center(3) - B_center(3)) + R1xp(1) = (0.d0, 0.d0) + R1xp(2) = -(P_center(3) - C_center(3)) + + a_z = power_A(3) + b_z = power_B(3) + call I_x1_pol_mult_one_e_cosgtos(a_z, b_z, R1x, R1xp, R2x, d3, n_pt3, n_pt_in) + + if(n_pt3 < 0) then + n_pt_out = -1 + do i = 0, n_pt_in + d(i) = (0.d0, 0.d0) + enddo + return + endif + + ! --- + + n_pt_tmp = 0 + call multiply_cpoly(d1, n_pt1, d2, n_pt2, d, n_pt_tmp) + do i = 0, n_pt_tmp + d1(i) = (0.d0, 0.d0) + enddo + + n_pt_out = 0 + call multiply_cpoly(d, n_pt_tmp, d3, n_pt3, d1, n_pt_out) + do i = 0, n_pt_out + d(i) = d1(i) + enddo + +end subroutine give_cpolynomial_mult_center_one_e + +! --- + +recursive subroutine I_x1_pol_mult_one_e_cosgtos(a, c, R1x, R1xp, R2x, d, nd, n_pt_in) + + BEGIN_DOC + ! Recursive routine involved in the electron-nucleus potential + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: a, c, n_pt_in + complex*16, intent(in) :: R1x(0:2), R1xp(0:2), R2x(0:2) + integer, intent(inout) :: nd + complex*16, intent(inout) :: d(0:n_pt_in) + + integer :: nx, ix, dim, iy, ny + complex*16 :: X(0:max_dim) + complex*16 :: Y(0:max_dim) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y + + dim = n_pt_in + + if( (a==0) .and. (c==0)) then + + nd = 0 + d(0) = (1.d0, 0.d0) + return + + elseif( (c < 0) .or. (nd < 0) ) then + + nd = -1 + return + + elseif((a == 0) .and. (c .ne. 0)) then + + call I_x2_pol_mult_one_e_cosgtos(c, R1x, R1xp, R2x, d, nd, n_pt_in) + + elseif(a == 1) then + + nx = nd + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + Y(ix) = (0.d0, 0.d0) + enddo + + call I_x2_pol_mult_one_e_cosgtos(c-1, R1x, R1xp, R2x, X, nx, n_pt_in) + + do ix = 0, nx + X(ix) *= dble(c) + enddo + + call multiply_cpoly(X, nx, R2x, 2, d, nd) + + ny = 0 + call I_x2_pol_mult_one_e_cosgtos(c, R1x, R1xp, R2x, Y, ny, n_pt_in) + call multiply_cpoly(Y, ny, R1x, 2, d, nd) + + else + + nx = 0 + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + Y(ix) = (0.d0, 0.d0) + enddo + + call I_x1_pol_mult_one_e_cosgtos(a-2, c, R1x, R1xp, R2x, X, nx, n_pt_in) + + do ix = 0, nx + X(ix) *= dble(a-1) + enddo + call multiply_cpoly(X, nx, R2x, 2, d, nd) + + nx = nd + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + enddo + + call I_x1_pol_mult_one_e_cosgtos(a-1, c-1, R1x, R1xp, R2x, X, nx, n_pt_in) + do ix = 0, nx + X(ix) *= dble(c) + enddo + + call multiply_cpoly(X, nx, R2x, 2, d, nd) + + ny = 0 + call I_x1_pol_mult_one_e_cosgtos(a-1, c, R1x, R1xp, R2x, Y, ny, n_pt_in) + call multiply_cpoly(Y, ny, R1x, 2, d, nd) + + endif + +end subroutine I_x1_pol_mult_one_e_cosgtos + +! --- + +recursive subroutine I_x2_pol_mult_one_e_cosgtos(c, R1x, R1xp, R2x, d, nd, dim) + + BEGIN_DOC + ! Recursive routine involved in the electron-nucleus potential + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: dim, c + complex*16, intent(in) :: R1x(0:2), R1xp(0:2), R2x(0:2) + integer, intent(inout) :: nd + complex*16, intent(out) :: d(0:max_dim) + + integer :: i, nx, ix, ny + complex*16 :: X(0:max_dim), Y(0:max_dim) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y + + if(c == 0) then + + nd = 0 + d(0) = (1.d0, 0.d0) + return + + elseif((nd < 0) .or. (c < 0)) then + + nd = -1 + return + + else + + nx = 0 + do ix = 0, dim + X(ix) = (0.d0, 0.d0) + Y(ix) = (0.d0, 0.d0) + enddo + + call I_x1_pol_mult_one_e_cosgtos(0, c-2, R1x, R1xp, R2x, X, nx, dim) + + do ix = 0, nx + X(ix) *= dble(c-1) + enddo + + call multiply_cpoly(X, nx, R2x, 2, d, nd) + + ny = 0 + do ix = 0, dim + Y(ix) = (0.d0, 0.d0) + enddo + + call I_x1_pol_mult_one_e_cosgtos(0, c-1, R1x, R1xp, R2x, Y, ny, dim) + + if(ny .ge. 0) then + call multiply_cpoly(Y, ny, R1xp, 2, d, nd) + endif + + endif + +end subroutine I_x2_pol_mult_one_e_cosgtos + +! --- + +complex*16 function V_n_e_cosgtos(a_x, a_y, a_z, b_x, b_y, b_z, alpha, beta) + + BEGIN_DOC + ! Primitve nuclear attraction between the two primitves centered on the same atom. + ! + ! $p_1 = x^{a_x} y^{a_y} z^{a_z} \exp(-\alpha r^2)$ + ! + ! $p_2 = x^{b_x} y^{b_y} z^{b_z} \exp(-\beta r^2)$ + END_DOC + + implicit none + + integer, intent(in) :: a_x, a_y, a_z, b_x, b_y, b_z + complex*16, intent(in) :: alpha, beta + + double precision :: V_phi, V_theta + complex*16 :: V_r_cosgtos + + if( (iand(a_x + b_x, 1) == 1) .or. & + (iand(a_y + b_y, 1) == 1) .or. & + (iand(a_z + b_z, 1) == 1) ) then + + V_n_e_cosgtos = (0.d0, 0.d0) + + else + + V_n_e_cosgtos = V_r_cosgtos(a_x + b_x + a_y + b_y + a_z + b_z + 1, alpha + beta) & + * V_phi(a_x + b_x, a_y + b_y) & + * V_theta(a_z + b_z, a_x + b_x + a_y + b_y + 1) + endif + +end function V_n_e_cosgtos + +! --- + +complex*16 function V_r_cosgtos(n, alpha) + + BEGIN_DOC + ! Computes the radial part of the nuclear attraction integral: + ! + ! $\int_{0}^{\infty} r^n \exp(-\alpha r^2) dr$ + ! + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer , intent(in) :: n + complex*16, intent(in) :: alpha + + double precision :: fact + + if(iand(n, 1) .eq. 1) then + V_r_cosgtos = 0.5d0 * fact(shiftr(n, 1)) / (alpha**(shiftr(n, 1) + 1)) + else + V_r_cosgtos = sqpi * fact(n) / fact(shiftr(n, 1)) * (0.5d0/zsqrt(alpha))**(n+1) + endif + +end function V_r_cosgtos + +! --- + diff --git a/src/cosgtos_ao_int/one_e_kin_integrals.irp.f b/src/cosgtos_ao_int/one_e_kin_integrals.irp.f new file mode 100644 index 00000000..710b04d4 --- /dev/null +++ b/src/cosgtos_ao_int/one_e_kin_integrals.irp.f @@ -0,0 +1,223 @@ + +! --- + + BEGIN_PROVIDER [ double precision, ao_deriv2_cosgtos_x, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_deriv2_cosgtos_y, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_deriv2_cosgtos_z, (ao_num, ao_num) ] + + implicit none + integer :: i, j, n, l, dim1, power_A(3), power_B(3) + double precision :: c, deriv_tmp + complex*16 :: alpha, beta, A_center(3), B_center(3) + complex*16 :: overlap_x, overlap_y, overlap_z, overlap + complex*16 :: overlap_x0_1, overlap_y0_1, overlap_z0_1 + complex*16 :: overlap_x0_2, overlap_y0_2, overlap_z0_2 + complex*16 :: overlap_m2_1, overlap_p2_1 + complex*16 :: overlap_m2_2, overlap_p2_2 + complex*16 :: deriv_tmp_1, deriv_tmp_2 + + + dim1 = 100 + + ! -- Dummy call to provide everything + + A_center(:) = (0.0d0, 0.d0) + B_center(:) = (1.0d0, 0.d0) + alpha = (1.0d0, 0.d0) + beta = (0.1d0, 0.d0) + power_A = 1 + power_B = 0 + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_x0_1, overlap_y0_1, overlap_z0_1, overlap, dim1 ) + + ! --- + + !$OMP PARALLEL DO SCHEDULE(GUIDED) & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE( A_center, B_center, power_A, power_B, alpha, beta, i, j, l, n, c & + !$OMP , deriv_tmp, deriv_tmp_1, deriv_tmp_2 & + !$OMP , overlap_x, overlap_y, overlap_z, overlap & + !$OMP , overlap_m2_1, overlap_p2_1, overlap_m2_2, overlap_p2_2 & + !$OMP , overlap_x0_1, overlap_y0_1, overlap_z0_1, overlap_x0_2, overlap_y0_2, overlap_z0_2 ) & + !$OMP SHARED( nucl_coord, ao_power, ao_prim_num, ao_num, ao_nucl, dim1 & + !$OMP , ao_coef_norm_ord_transp_cosgtos, ao_expo_ord_transp_cosgtos & + !$OMP , ao_deriv2_cosgtos_x, ao_deriv2_cosgtos_y, ao_deriv2_cosgtos_z ) + + do j = 1, ao_num + A_center(1) = nucl_coord(ao_nucl(j),1) * (1.d0, 0.d0) + A_center(2) = nucl_coord(ao_nucl(j),2) * (1.d0, 0.d0) + A_center(3) = nucl_coord(ao_nucl(j),3) * (1.d0, 0.d0) + power_A(1) = ao_power(j,1) + power_A(2) = ao_power(j,2) + power_A(3) = ao_power(j,3) + + do i = 1, ao_num + B_center(1) = nucl_coord(ao_nucl(i),1) * (1.d0, 0.d0) + B_center(2) = nucl_coord(ao_nucl(i),2) * (1.d0, 0.d0) + B_center(3) = nucl_coord(ao_nucl(i),3) * (1.d0, 0.d0) + power_B(1) = ao_power(i,1) + power_B(2) = ao_power(i,2) + power_B(3) = ao_power(i,3) + + ao_deriv2_cosgtos_x(i,j) = 0.d0 + ao_deriv2_cosgtos_y(i,j) = 0.d0 + ao_deriv2_cosgtos_z(i,j) = 0.d0 + + do n = 1, ao_prim_num(j) + alpha = ao_expo_ord_transp_cosgtos(n,j) + + do l = 1, ao_prim_num(i) + c = ao_coef_norm_ord_transp_cosgtos(n,j) * ao_coef_norm_ord_transp_cosgtos(l,i) + beta = ao_expo_ord_transp_cosgtos(l,i) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_x0_1, overlap_y0_1, overlap_z0_1, overlap, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B & + , overlap_x0_2, overlap_y0_2, overlap_z0_2, overlap, dim1 ) + + ! --- + + power_A(1) = power_A(1) - 2 + if(power_A(1) > -1) then + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_m2_1, overlap_y, overlap_z, overlap, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B & + , overlap_m2_2, overlap_y, overlap_z, overlap, dim1 ) + else + overlap_m2_1 = (0.d0, 0.d0) + overlap_m2_2 = (0.d0, 0.d0) + endif + + power_A(1) = power_A(1) + 4 + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_p2_1, overlap_y, overlap_z, overlap, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B & + , overlap_p2_2, overlap_y, overlap_z, overlap, dim1 ) + + power_A(1) = power_A(1) - 2 + + deriv_tmp_1 = ( -2.d0 * alpha * (2.d0 * power_A(1) + 1.d0) * overlap_x0_1 & + + power_A(1) * (power_A(1) - 1.d0) * overlap_m2_1 & + + 4.d0 * alpha * alpha * overlap_p2_1 ) * overlap_y0_1 * overlap_z0_1 + + deriv_tmp_2 = ( -2.d0 * alpha * (2.d0 * power_A(1) + 1.d0) * overlap_x0_2 & + + power_A(1) * (power_A(1) - 1.d0) * overlap_m2_2 & + + 4.d0 * alpha * alpha * overlap_p2_2 ) * overlap_y0_2 * overlap_z0_2 + + deriv_tmp = 2.d0 * real(deriv_tmp_1 + deriv_tmp_2) + + ao_deriv2_cosgtos_x(i,j) += c * deriv_tmp + + ! --- + + power_A(2) = power_A(2) - 2 + if(power_A(2) > -1) then + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_x, overlap_m2_1, overlap_y, overlap, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B & + , overlap_x, overlap_m2_2, overlap_y, overlap, dim1 ) + else + overlap_m2_1 = (0.d0, 0.d0) + overlap_m2_2 = (0.d0, 0.d0) + endif + + power_A(2) = power_A(2) + 4 + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_x, overlap_p2_1, overlap_y, overlap, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B & + , overlap_x, overlap_p2_2, overlap_y, overlap, dim1 ) + + power_A(2) = power_A(2) - 2 + + deriv_tmp_1 = ( -2.d0 * alpha * (2.d0 * power_A(2) + 1.d0) * overlap_y0_1 & + + power_A(2) * (power_A(2) - 1.d0) * overlap_m2_1 & + + 4.d0 * alpha * alpha * overlap_p2_1 ) * overlap_x0_1 * overlap_z0_1 + + deriv_tmp_2 = ( -2.d0 * alpha * (2.d0 * power_A(2) + 1.d0) * overlap_y0_2 & + + power_A(2) * (power_A(2) - 1.d0) * overlap_m2_2 & + + 4.d0 * alpha * alpha * overlap_p2_2 ) * overlap_x0_2 * overlap_z0_2 + + deriv_tmp = 2.d0 * real(deriv_tmp_1 + deriv_tmp_2) + + ao_deriv2_cosgtos_y(i,j) += c * deriv_tmp + + ! --- + + power_A(3) = power_A(3) - 2 + if(power_A(3) > -1) then + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_x, overlap_y, overlap_m2_1, overlap, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B & + , overlap_x, overlap_y, overlap_m2_2, overlap, dim1 ) + else + overlap_m2_1 = (0.d0, 0.d0) + overlap_m2_2 = (0.d0, 0.d0) + endif + + power_A(3) = power_A(3) + 4 + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_x, overlap_y, overlap_p2_1, overlap, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B & + , overlap_x, overlap_y, overlap_p2_2, overlap, dim1 ) + + power_A(3) = power_A(3) - 2 + + deriv_tmp_1 = ( -2.d0 * alpha * (2.d0 * power_A(3) + 1.d0) * overlap_z0_1 & + + power_A(3) * (power_A(3) - 1.d0) * overlap_m2_1 & + + 4.d0 * alpha * alpha * overlap_p2_1 ) * overlap_x0_1 * overlap_y0_1 + + deriv_tmp_2 = ( -2.d0 * alpha * (2.d0 * power_A(3) + 1.d0) * overlap_z0_2 & + + power_A(3) * (power_A(3) - 1.d0) * overlap_m2_2 & + + 4.d0 * alpha * alpha * overlap_p2_2 ) * overlap_x0_2 * overlap_y0_2 + + deriv_tmp = 2.d0 * real(deriv_tmp_1 + deriv_tmp_2) + + ao_deriv2_cosgtos_z(i,j) += c * deriv_tmp + + ! --- + + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, ao_kinetic_integrals_cosgtos, (ao_num, ao_num)] + + BEGIN_DOC + ! + ! Kinetic energy integrals in the cosgtos |AO| basis. + ! + ! $\langle \chi_i |\hat{T}| \chi_j \rangle$ + ! + END_DOC + + implicit none + integer :: i, j + + !$OMP PARALLEL DO DEFAULT(NONE) & + !$OMP PRIVATE(i, j) & + !$OMP SHARED(ao_num, ao_kinetic_integrals_cosgtos, ao_deriv2_cosgtos_x, ao_deriv2_cosgtos_y, ao_deriv2_cosgtos_z) + do j = 1, ao_num + do i = 1, ao_num + ao_kinetic_integrals_cosgtos(i,j) = -0.5d0 * ( ao_deriv2_cosgtos_x(i,j) & + + ao_deriv2_cosgtos_y(i,j) & + + ao_deriv2_cosgtos_z(i,j) ) + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + +! --- diff --git a/src/cosgtos_ao_int/two_e_Coul_integrals.irp.f b/src/cosgtos_ao_int/two_e_Coul_integrals.irp.f new file mode 100644 index 00000000..527a98d5 --- /dev/null +++ b/src/cosgtos_ao_int/two_e_Coul_integrals.irp.f @@ -0,0 +1,1584 @@ + +! --- + +double precision function ao_two_e_integral_cosgtos(i, j, k, l) + + BEGIN_DOC + ! integral of the AO basis or (ij|kl) + ! i(r1) j(r1) 1/r12 k(r2) l(r2) + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: i, j, k, l + + integer :: p, q, r, s + integer :: num_i, num_j, num_k, num_l, dim1, I_power(3), J_power(3), K_power(3), L_power(3) + integer :: iorder_p1(3), iorder_p2(3), iorder_p3(3), iorder_p4(3), iorder_q1(3), iorder_q2(3) + double precision :: coef1, coef2, coef3, coef4 + complex*16 :: I_center(3), J_center(3), K_center(3), L_center(3) + complex*16 :: expo1, expo2, expo3, expo4 + complex*16 :: P1_new(0:max_dim,3), P1_center(3), fact_p1, pp1, p1_inv + complex*16 :: P2_new(0:max_dim,3), P2_center(3), fact_p2, pp2, p2_inv + complex*16 :: P3_new(0:max_dim,3), P3_center(3), fact_p3, pp3, p3_inv + complex*16 :: P4_new(0:max_dim,3), P4_center(3), fact_p4, pp4, p4_inv + complex*16 :: Q1_new(0:max_dim,3), Q1_center(3), fact_q1, qq1, q1_inv + complex*16 :: Q2_new(0:max_dim,3), Q2_center(3), fact_q2, qq2, q2_inv + complex*16 :: integral1, integral2, integral3, integral4 + complex*16 :: integral5, integral6, integral7, integral8 + complex*16 :: integral_tot + + double precision :: ao_two_e_integral_cosgtos_schwartz_accel + complex*16 :: ERI_cosgtos + complex*16 :: general_primitive_integral_cosgtos + + if(ao_prim_num(i) * ao_prim_num(j) * ao_prim_num(k) * ao_prim_num(l) > 1024) then + + !print *, ' with shwartz acc ' + ao_two_e_integral_cosgtos = ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) + + else + !print *, ' without shwartz acc ' + + dim1 = n_pt_max_integrals + + num_i = ao_nucl(i) + num_j = ao_nucl(j) + num_k = ao_nucl(k) + num_l = ao_nucl(l) + + ao_two_e_integral_cosgtos = 0.d0 + + if(num_i /= num_j .or. num_k /= num_l .or. num_j /= num_k) then + !print *, ' not the same center' + + do p = 1, 3 + I_power(p) = ao_power(i,p) + J_power(p) = ao_power(j,p) + K_power(p) = ao_power(k,p) + L_power(p) = ao_power(l,p) + I_center(p) = nucl_coord(num_i,p) * (1.d0, 0.d0) + J_center(p) = nucl_coord(num_j,p) * (1.d0, 0.d0) + K_center(p) = nucl_coord(num_k,p) * (1.d0, 0.d0) + L_center(p) = nucl_coord(num_l,p) * (1.d0, 0.d0) + enddo + + do p = 1, ao_prim_num(i) + coef1 = ao_coef_norm_ord_transp_cosgtos(p,i) + expo1 = ao_expo_ord_transp_cosgtos(p,i) + + do q = 1, ao_prim_num(j) + coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(q,j) + expo2 = ao_expo_ord_transp_cosgtos(q,j) + + call give_explicit_cpoly_and_cgaussian( P1_new, P1_center, pp1, fact_p1, iorder_p1 & + , expo1, expo2, I_power, J_power, I_center, J_center, dim1 ) + p1_inv = (1.d0,0.d0) / pp1 + + call give_explicit_cpoly_and_cgaussian( P2_new, P2_center, pp2, fact_p2, iorder_p2 & + , conjg(expo1), expo2, I_power, J_power, I_center, J_center, dim1 ) + p2_inv = (1.d0,0.d0) / pp2 + + call give_explicit_cpoly_and_cgaussian( P3_new, P3_center, pp3, fact_p3, iorder_p3 & + , expo1, conjg(expo2), I_power, J_power, I_center, J_center, dim1 ) + p3_inv = (1.d0,0.d0) / pp3 + + call give_explicit_cpoly_and_cgaussian( P4_new, P4_center, pp4, fact_p4, iorder_p4 & + , conjg(expo1), conjg(expo2), I_power, J_power, I_center, J_center, dim1 ) + p4_inv = (1.d0,0.d0) / pp4 + + !integer :: ii + !do ii = 1, 3 + ! print *, 'fact_p1', fact_p1 + ! print *, 'fact_p2', fact_p2 + ! print *, 'fact_p3', fact_p3 + ! print *, 'fact_p4', fact_p4 + ! !print *, pp1, p1_inv + ! !print *, pp2, p2_inv + ! !print *, pp3, p3_inv + ! !print *, pp4, p4_inv + !enddo + ! if( abs(aimag(P1_center(ii))) .gt. 0.d0 ) then + ! print *, ' P_1 is complex !!' + ! print *, P1_center + ! print *, expo1, expo2 + ! print *, conjg(expo1), conjg(expo2) + ! stop + ! endif + ! if( abs(aimag(P2_center(ii))) .gt. 0.d0 ) then + ! print *, ' P_2 is complex !!' + ! print *, P2_center + ! print *, ' old expos:' + ! print *, expo1, expo2 + ! print *, conjg(expo1), conjg(expo2) + ! print *, ' new expo:' + ! print *, pp2, p2_inv + ! print *, ' factor:' + ! print *, fact_p2 + ! print *, ' old centers:' + ! print *, I_center, J_center + ! print *, ' powers:' + ! print *, I_power, J_power + ! stop + ! endif + ! if( abs(aimag(P3_center(ii))) .gt. 0.d0 ) then + ! print *, ' P_3 is complex !!' + ! print *, P3_center + ! print *, expo1, expo2 + ! print *, conjg(expo1), conjg(expo2) + ! stop + ! endif + ! if( abs(aimag(P4_center(ii))) .gt. 0.d0 ) then + ! print *, ' P_4 is complex !!' + ! print *, P4_center + ! print *, expo1, expo2 + ! print *, conjg(expo1), conjg(expo2) + ! stop + ! endif + !enddo + + do r = 1, ao_prim_num(k) + coef3 = coef2 * ao_coef_norm_ord_transp_cosgtos(r,k) + expo3 = ao_expo_ord_transp_cosgtos(r,k) + + do s = 1, ao_prim_num(l) + coef4 = coef3 * ao_coef_norm_ord_transp_cosgtos(s,l) + expo4 = ao_expo_ord_transp_cosgtos(s,l) + + call give_explicit_cpoly_and_cgaussian( Q1_new, Q1_center, qq1, fact_q1, iorder_q1 & + , expo3, expo4, K_power, L_power, K_center, L_center, dim1 ) + q1_inv = (1.d0,0.d0) / qq1 + + call give_explicit_cpoly_and_cgaussian( Q2_new, Q2_center, qq2, fact_q2, iorder_q2 & + , conjg(expo3), expo4, K_power, L_power, K_center, L_center, dim1 ) + q2_inv = (1.d0,0.d0) / qq2 + + !do ii = 1, 3 + ! !print *, qq1, q1_inv + ! !print *, qq2, q2_inv + ! print *, 'fact_q1', fact_q1 + ! print *, 'fact_q2', fact_q2 + !enddo + ! if( abs(aimag(Q1_center(ii))) .gt. 0.d0 ) then + ! print *, ' Q_1 is complex !!' + ! print *, Q1_center + ! print *, expo3, expo4 + ! print *, conjg(expo3), conjg(expo4) + ! stop + ! endif + ! if( abs(aimag(Q2_center(ii))) .gt. 0.d0 ) then + ! print *, ' Q_2 is complex !!' + ! print *, Q2_center + ! print *, expo3, expo4 + ! print *, conjg(expo3), conjg(expo4) + ! stop + ! endif + !enddo + + + integral1 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral2 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + integral3 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral4 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + integral5 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral6 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + integral7 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral8 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + !integral_tot = integral1 + !print*, integral_tot + + ao_two_e_integral_cosgtos = ao_two_e_integral_cosgtos + coef4 * 2.d0 * real(integral_tot) + enddo ! s + enddo ! r + enddo ! q + enddo ! p + + else + !print *, ' the same center' + + do p = 1, 3 + I_power(p) = ao_power(i,p) + J_power(p) = ao_power(j,p) + K_power(p) = ao_power(k,p) + L_power(p) = ao_power(l,p) + enddo + + do p = 1, ao_prim_num(i) + coef1 = ao_coef_norm_ord_transp_cosgtos(p,i) + expo1 = ao_expo_ord_transp_cosgtos(p,i) + + do q = 1, ao_prim_num(j) + coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(q,j) + expo2 = ao_expo_ord_transp_cosgtos(q,j) + + do r = 1, ao_prim_num(k) + coef3 = coef2 * ao_coef_norm_ord_transp_cosgtos(r,k) + expo3 = ao_expo_ord_transp_cosgtos(r,k) + + do s = 1, ao_prim_num(l) + coef4 = coef3 * ao_coef_norm_ord_transp_cosgtos(s,l) + expo4 = ao_expo_ord_transp_cosgtos(s,l) + + integral1 = ERI_cosgtos( expo1, expo2, expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral2 = ERI_cosgtos( expo1, expo2, conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral3 = ERI_cosgtos( conjg(expo1), expo2, expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral4 = ERI_cosgtos( conjg(expo1), expo2, conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral5 = ERI_cosgtos( expo1, conjg(expo2), expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral6 = ERI_cosgtos( expo1, conjg(expo2), conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral7 = ERI_cosgtos( conjg(expo1), conjg(expo2), expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral8 = ERI_cosgtos( conjg(expo1), conjg(expo2), conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + + ao_two_e_integral_cosgtos = ao_two_e_integral_cosgtos + coef4 * 2.d0 * real(integral_tot) + enddo ! s + enddo ! r + enddo ! q + enddo ! p + + endif + endif + +end function ao_two_e_integral_cosgtos + +! --- + +double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) + + BEGIN_DOC + ! integral of the AO basis or (ij|kl) + ! i(r1) j(r1) 1/r12 k(r2) l(r2) + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: i, j, k, l + + integer :: p, q, r, s + integer :: num_i, num_j, num_k, num_l, dim1, I_power(3), J_power(3), K_power(3), L_power(3) + integer :: iorder_p1(3), iorder_p2(3), iorder_p3(3), iorder_p4(3), iorder_q1(3), iorder_q2(3) + double precision :: coef1, coef2, coef3, coef4 + complex*16 :: I_center(3), J_center(3), K_center(3), L_center(3) + complex*16 :: expo1, expo2, expo3, expo4 + complex*16 :: P1_new(0:max_dim,3), P1_center(3), fact_p1, pp1, p1_inv + complex*16 :: P2_new(0:max_dim,3), P2_center(3), fact_p2, pp2, p2_inv + complex*16 :: P3_new(0:max_dim,3), P3_center(3), fact_p3, pp3, p3_inv + complex*16 :: P4_new(0:max_dim,3), P4_center(3), fact_p4, pp4, p4_inv + complex*16 :: Q1_new(0:max_dim,3), Q1_center(3), fact_q1, qq1, q1_inv + complex*16 :: Q2_new(0:max_dim,3), Q2_center(3), fact_q2, qq2, q2_inv + complex*16 :: integral1, integral2, integral3, integral4 + complex*16 :: integral5, integral6, integral7, integral8 + complex*16 :: integral_tot + + double precision, allocatable :: schwartz_kl(:,:) + double precision :: thr + double precision :: schwartz_ij + + complex*16 :: ERI_cosgtos + complex*16 :: general_primitive_integral_cosgtos + + ao_two_e_integral_cosgtos_schwartz_accel = 0.d0 + + dim1 = n_pt_max_integrals + + num_i = ao_nucl(i) + num_j = ao_nucl(j) + num_k = ao_nucl(k) + num_l = ao_nucl(l) + + + thr = ao_integrals_threshold*ao_integrals_threshold + + allocate( schwartz_kl(0:ao_prim_num(l),0:ao_prim_num(k)) ) + + if(num_i /= num_j .or. num_k /= num_l .or. num_j /= num_k) then + + do p = 1, 3 + I_power(p) = ao_power(i,p) + J_power(p) = ao_power(j,p) + K_power(p) = ao_power(k,p) + L_power(p) = ao_power(l,p) + I_center(p) = nucl_coord(num_i,p) * (1.d0, 0.d0) + J_center(p) = nucl_coord(num_j,p) * (1.d0, 0.d0) + K_center(p) = nucl_coord(num_k,p) * (1.d0, 0.d0) + L_center(p) = nucl_coord(num_l,p) * (1.d0, 0.d0) + enddo + + schwartz_kl(0,0) = 0.d0 + do r = 1, ao_prim_num(k) + coef1 = ao_coef_norm_ord_transp_cosgtos(r,k) * ao_coef_norm_ord_transp_cosgtos(r,k) + expo1 = ao_expo_ord_transp_cosgtos(r,k) + + schwartz_kl(0,r) = 0.d0 + do s = 1, ao_prim_num(l) + coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(s,l) * ao_coef_norm_ord_transp_cosgtos(s,l) + expo2 = ao_expo_ord_transp_cosgtos(s,l) + + call give_explicit_cpoly_and_cgaussian( P1_new, P1_center, pp1, fact_p1, iorder_p1 & + , expo1, expo2, K_power, L_power, K_center, L_center, dim1 ) + p1_inv = (1.d0,0.d0) / pp1 + + call give_explicit_cpoly_and_cgaussian( P2_new, P2_center, pp2, fact_p2, iorder_p2 & + , conjg(expo1), expo2, K_power, L_power, K_center, L_center, dim1 ) + p2_inv = (1.d0,0.d0) / pp2 + + call give_explicit_cpoly_and_cgaussian( P3_new, P3_center, pp3, fact_p3, iorder_p3 & + , expo1, conjg(expo2), K_power, L_power, K_center, L_center, dim1 ) + p3_inv = (1.d0,0.d0) / pp3 + + call give_explicit_cpoly_and_cgaussian( P4_new, P4_center, pp4, fact_p4, iorder_p4 & + , conjg(expo1), conjg(expo2), K_power, L_power, K_center, L_center, dim1 ) + p4_inv = (1.d0,0.d0) / pp4 + + integral1 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral2 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral3 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral4 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral5 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral6 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral7 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral8 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + + schwartz_kl(s,r) = coef2 * 2.d0 * real(integral_tot) + + schwartz_kl(0,r) = max(schwartz_kl(0,r), schwartz_kl(s,r)) + enddo + + schwartz_kl(0,0) = max(schwartz_kl(0,r), schwartz_kl(0,0)) + enddo + + + do p = 1, ao_prim_num(i) + coef1 = ao_coef_norm_ord_transp_cosgtos(p,i) + expo1 = ao_expo_ord_transp_cosgtos(p,i) + + do q = 1, ao_prim_num(j) + coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(q,j) + expo2 = ao_expo_ord_transp_cosgtos(q,j) + + call give_explicit_cpoly_and_cgaussian( P1_new, P1_center, pp1, fact_p1, iorder_p1 & + , expo1, expo2, I_power, J_power, I_center, J_center, dim1 ) + p1_inv = (1.d0,0.d0) / pp1 + + call give_explicit_cpoly_and_cgaussian( P2_new, P2_center, pp2, fact_p2, iorder_p2 & + , conjg(expo1), expo2, I_power, J_power, I_center, J_center, dim1 ) + p2_inv = (1.d0,0.d0) / pp2 + + call give_explicit_cpoly_and_cgaussian( P3_new, P3_center, pp3, fact_p3, iorder_p3 & + , expo1, conjg(expo2), I_power, J_power, I_center, J_center, dim1 ) + p3_inv = (1.d0,0.d0) / pp3 + + call give_explicit_cpoly_and_cgaussian( P4_new, P4_center, pp4, fact_p4, iorder_p4 & + , conjg(expo1), conjg(expo2), I_power, J_power, I_center, J_center, dim1 ) + p4_inv = (1.d0,0.d0) / pp4 + + integral1 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral2 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral3 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral4 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral5 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral6 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral7 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral8 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + schwartz_ij = coef2 * coef2 * 2.d0 * real(integral_tot) + + if(schwartz_kl(0,0)*schwartz_ij < thr) cycle + + do r = 1, ao_prim_num(k) + if(schwartz_kl(0,r)*schwartz_ij < thr) cycle + + coef3 = coef2 * ao_coef_norm_ord_transp_cosgtos(r,k) + expo3 = ao_expo_ord_transp_cosgtos(r,k) + + do s = 1, ao_prim_num(l) + if(schwartz_kl(s,r)*schwartz_ij < thr) cycle + + coef4 = coef3 * ao_coef_norm_ord_transp_cosgtos(s,l) + expo4 = ao_expo_ord_transp_cosgtos(s,l) + + call give_explicit_cpoly_and_cgaussian( Q1_new, Q1_center, qq1, fact_q1, iorder_q1 & + , expo3, expo4, K_power, L_power, K_center, L_center, dim1 ) + q1_inv = (1.d0,0.d0) / qq1 + + call give_explicit_cpoly_and_cgaussian( Q2_new, Q2_center, qq2, fact_q2, iorder_q2 & + , conjg(expo3), expo4, K_power, L_power, K_center, L_center, dim1 ) + q2_inv = (1.d0,0.d0) / qq2 + + integral1 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral2 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + integral3 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral4 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + + integral5 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral6 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + integral7 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral8 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + ao_two_e_integral_cosgtos_schwartz_accel = ao_two_e_integral_cosgtos_schwartz_accel & + + coef4 * 2.d0 * real(integral_tot) + enddo ! s + enddo ! r + enddo ! q + enddo ! p + + else + + do p = 1, 3 + I_power(p) = ao_power(i,p) + J_power(p) = ao_power(j,p) + K_power(p) = ao_power(k,p) + L_power(p) = ao_power(l,p) + enddo + + schwartz_kl(0,0) = 0.d0 + do r = 1, ao_prim_num(k) + coef1 = ao_coef_norm_ord_transp_cosgtos(r,k) * ao_coef_norm_ord_transp_cosgtos(r,k) + expo1 = ao_expo_ord_transp_cosgtos(r,k) + + schwartz_kl(0,r) = 0.d0 + do s = 1, ao_prim_num(l) + coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(s,l) * ao_coef_norm_ord_transp_cosgtos(s,l) + expo2 = ao_expo_ord_transp_cosgtos(s,l) + + integral1 = ERI_cosgtos( expo1, expo2, expo1, expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + integral2 = ERI_cosgtos( expo1, expo2, conjg(expo1), expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + + integral3 = ERI_cosgtos( conjg(expo1), expo2, expo1, expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + integral4 = ERI_cosgtos( conjg(expo1), expo2, conjg(expo1), expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + + integral5 = ERI_cosgtos( expo1, conjg(expo2), expo1, expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + integral6 = ERI_cosgtos( expo1, conjg(expo2), conjg(expo1), expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + + integral7 = ERI_cosgtos( conjg(expo1), conjg(expo2), expo1, expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + integral8 = ERI_cosgtos( conjg(expo1), conjg(expo2), conjg(expo1), expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + + schwartz_kl(s,r) = coef2 * 2.d0 * real(integral_tot) + + schwartz_kl(0,r) = max(schwartz_kl(0,r), schwartz_kl(s,r)) + enddo + schwartz_kl(0,0) = max(schwartz_kl(0,r), schwartz_kl(0,0)) + enddo + + do p = 1, ao_prim_num(i) + coef1 = ao_coef_norm_ord_transp_cosgtos(p,i) + expo1 = ao_expo_ord_transp_cosgtos(p,i) + + do q = 1, ao_prim_num(j) + coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(q,j) + expo2 = ao_expo_ord_transp_cosgtos(q,j) + + integral1 = ERI_cosgtos( expo1, expo2, expo1, expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral2 = ERI_cosgtos( expo1, expo2, conjg(expo1), expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral3 = ERI_cosgtos( conjg(expo1), expo2, expo1, expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral4 = ERI_cosgtos( conjg(expo1), expo2, conjg(expo1), expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral5 = ERI_cosgtos( expo1, conjg(expo2), expo1, expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral6 = ERI_cosgtos( expo1, conjg(expo2), conjg(expo1), expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral7 = ERI_cosgtos( conjg(expo1), conjg(expo2), expo1, expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral8 = ERI_cosgtos( conjg(expo1), conjg(expo2), conjg(expo1), expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + schwartz_ij = coef2 * coef2 * 2.d0 * real(integral_tot) + + if(schwartz_kl(0,0)*schwartz_ij < thr) cycle + do r = 1, ao_prim_num(k) + if(schwartz_kl(0,r)*schwartz_ij < thr) cycle + + coef3 = coef2 * ao_coef_norm_ord_transp_cosgtos(r,k) + expo3 = ao_expo_ord_transp_cosgtos(r,k) + + do s = 1, ao_prim_num(l) + if(schwartz_kl(s,r)*schwartz_ij < thr) cycle + + coef4 = coef3 * ao_coef_norm_ord_transp_cosgtos(s,l) + expo4 = ao_expo_ord_transp_cosgtos(s,l) + + integral1 = ERI_cosgtos( expo1, expo2, expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral2 = ERI_cosgtos( expo1, expo2, conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral3 = ERI_cosgtos( conjg(expo1), expo2, expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral4 = ERI_cosgtos( conjg(expo1), expo2, conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral5 = ERI_cosgtos( expo1, conjg(expo2), expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral6 = ERI_cosgtos( expo1, conjg(expo2), conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral7 = ERI_cosgtos( conjg(expo1), conjg(expo2), expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral8 = ERI_cosgtos( conjg(expo1), conjg(expo2), conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + ao_two_e_integral_cosgtos_schwartz_accel = ao_two_e_integral_cosgtos_schwartz_accel & + + coef4 * 2.d0 * real(integral_tot) + enddo ! s + enddo ! r + enddo ! q + enddo ! p + + endif + + deallocate(schwartz_kl) + +end function ao_two_e_integral_cosgtos_schwartz_accel + +! --- + +BEGIN_PROVIDER [ double precision, ao_two_e_integral_cosgtos_schwartz, (ao_num,ao_num) ] + + BEGIN_DOC + ! Needed to compute Schwartz inequalities + END_DOC + + implicit none + integer :: i, k + double precision :: ao_two_e_integral_cosgtos + + ao_two_e_integral_cosgtos_schwartz(1,1) = ao_two_e_integral_cosgtos(1, 1, 1, 1) + + !$OMP PARALLEL DO PRIVATE(i,k) & + !$OMP DEFAULT(NONE) & + !$OMP SHARED(ao_num, ao_two_e_integral_cosgtos_schwartz) & + !$OMP SCHEDULE(dynamic) + do i = 1, ao_num + do k = 1, i + ao_two_e_integral_cosgtos_schwartz(i,k) = dsqrt(ao_two_e_integral_cosgtos(i, i, k, k)) + ao_two_e_integral_cosgtos_schwartz(k,i) = ao_two_e_integral_cosgtos_schwartz(i,k) + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + +! --- + +complex*16 function general_primitive_integral_cosgtos( dim, P_new, P_center, fact_p, p, p_inv, iorder_p & + , Q_new, Q_center, fact_q, q, q_inv, iorder_q ) + + BEGIN_DOC + ! + ! Computes the integral where p,q,r,s are cos-cGTOS primitives + ! + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: dim + integer, intent(in) :: iorder_p(3), iorder_q(3) + complex*16, intent(in) :: P_new(0:max_dim,3), P_center(3), fact_p, p, p_inv + complex*16, intent(in) :: Q_new(0:max_dim,3), Q_center(3), fact_q, q, q_inv + + integer :: i, j, nx, ny, nz, n_Ix, n_Iy, n_Iz, iorder, n_pt_tmp, n_pt_out + double precision :: tmp_mod + double precision :: ppq_re, ppq_im, ppq_mod, sq_ppq_re, sq_ppq_im + complex*16 :: pq, pq_inv, pq_inv_2, p01_1, p01_2, p10_1, p10_2, ppq, sq_ppq + complex*16 :: rho, dist, const + complex*16 :: accu, tmp_p, tmp_q + complex*16 :: dx(0:max_dim), Ix_pol(0:max_dim), dy(0:max_dim), Iy_pol(0:max_dim), dz(0:max_dim), Iz_pol(0:max_dim) + complex*16 :: d1(0:max_dim), d_poly(0:max_dim) + + complex*16 :: crint_sum + + + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: dx, Ix_pol, dy, Iy_pol, dz, Iz_pol + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: d1, d_poly + + general_primitive_integral_cosgtos = (0.d0, 0.d0) + + pq = (0.5d0, 0.d0) * p_inv * q_inv + pq_inv = (0.5d0, 0.d0) / (p + q) + pq_inv_2 = pq_inv + pq_inv + p10_1 = q * pq ! 1/(2p) + p01_1 = p * pq ! 1/(2q) + p10_2 = pq_inv_2 * p10_1 * q ! 0.5d0*q/(pq + p*p) + p01_2 = pq_inv_2 * p01_1 * p ! 0.5d0*p/(q*q + pq) + + ! get \sqrt(p + q) + !ppq = p + q + !ppq_re = REAL (ppq) + !ppq_im = AIMAG(ppq) + !ppq_mod = dsqrt(ppq_re*ppq_re + ppq_im*ppq_im) + !sq_ppq_re = sq_op5 * dsqrt(ppq_re + ppq_mod) + !sq_ppq_im = 0.5d0 * ppq_im / sq_ppq_re + !sq_ppq = sq_ppq_re + (0.d0, 1.d0) * sq_ppq_im + sq_ppq = zsqrt(p + q) + + ! --- + + iorder = iorder_p(1) + iorder_q(1) + iorder_p(1) + iorder_q(1) + + do i = 0, iorder + Ix_pol(i) = (0.d0, 0.d0) + enddo + + n_Ix = 0 + do i = 0, iorder_p(1) + + tmp_p = P_new(i,1) + tmp_mod = dsqrt(REAL(tmp_p)*REAL(tmp_p) + AIMAG(tmp_p)*AIMAG(tmp_p)) + if(tmp_mod < thresh) cycle + + do j = 0, iorder_q(1) + + tmp_q = tmp_p * Q_new(j,1) + tmp_mod = dsqrt(REAL(tmp_q)*REAL(tmp_q) + AIMAG(tmp_q)*AIMAG(tmp_q)) + if(tmp_mod < thresh) cycle + + !DIR$ FORCEINLINE + call give_cpolynom_mult_center_x(P_center(1), Q_center(1), i, j, p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dx, nx) + !DIR$ FORCEINLINE + call add_cpoly_multiply(dx, nx, tmp_q, Ix_pol, n_Ix) + enddo + enddo + if(n_Ix == -1) then + return + endif + + ! --- + + iorder = iorder_p(2) + iorder_q(2) + iorder_p(2) + iorder_q(2) + + do i = 0, iorder + Iy_pol(i) = (0.d0, 0.d0) + enddo + + n_Iy = 0 + do i = 0, iorder_p(2) + + tmp_p = P_new(i,2) + tmp_mod = dsqrt(REAL(tmp_p)*REAL(tmp_p) + AIMAG(tmp_p)*AIMAG(tmp_p)) + if(tmp_mod < thresh) cycle + + do j = 0, iorder_q(2) + + tmp_q = tmp_p * Q_new(j,2) + tmp_mod = dsqrt(REAL(tmp_q)*REAL(tmp_q) + AIMAG(tmp_q)*AIMAG(tmp_q)) + if(tmp_mod < thresh) cycle + + !DIR$ FORCEINLINE + call give_cpolynom_mult_center_x(P_center(2), Q_center(2), i, j, p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dy, ny) + !DIR$ FORCEINLINE + call add_cpoly_multiply(dy, ny, tmp_q, Iy_pol, n_Iy) + enddo + enddo + + if(n_Iy == -1) then + return + endif + + ! --- + + iorder = iorder_p(3) + iorder_q(3) + iorder_p(3) + iorder_q(3) + + do i = 0, iorder + Iz_pol(i) = (0.d0, 0.d0) + enddo + + n_Iz = 0 + do i = 0, iorder_p(3) + + tmp_p = P_new(i,3) + tmp_mod = dsqrt(REAL(tmp_p)*REAL(tmp_p) + AIMAG(tmp_p)*AIMAG(tmp_p)) + if(tmp_mod < thresh) cycle + + do j = 0, iorder_q(3) + + tmp_q = tmp_p * Q_new(j,3) + tmp_mod = dsqrt(REAL(tmp_q)*REAL(tmp_q) + AIMAG(tmp_q)*AIMAG(tmp_q)) + if(tmp_mod < thresh) cycle + + !DIR$ FORCEINLINE + call give_cpolynom_mult_center_x(P_center(3), Q_center(3), i, j, p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dz, nz) + !DIR$ FORCEINLINE + call add_cpoly_multiply(dz, nz, tmp_q, Iz_pol, n_Iz) + enddo + enddo + + if(n_Iz == -1) then + return + endif + + ! --- + + rho = p * q * pq_inv_2 + dist = (P_center(1) - Q_center(1)) * (P_center(1) - Q_center(1)) & + + (P_center(2) - Q_center(2)) * (P_center(2) - Q_center(2)) & + + (P_center(3) - Q_center(3)) * (P_center(3) - Q_center(3)) + const = dist * rho + + n_pt_tmp = n_Ix + n_Iy + do i = 0, n_pt_tmp + d_poly(i) = (0.d0, 0.d0) + enddo + + !DIR$ FORCEINLINE + call multiply_cpoly(Ix_pol, n_Ix, Iy_pol, n_Iy, d_poly, n_pt_tmp) + if(n_pt_tmp == -1) then + return + endif + n_pt_out = n_pt_tmp + n_Iz + do i = 0, n_pt_out + d1(i) = (0.d0, 0.d0) + enddo + + !DIR$ FORCEINLINE + call multiply_cpoly(d_poly, n_pt_tmp, Iz_pol, n_Iz, d1, n_pt_out) + + accu = crint_sum(n_pt_out, const, d1) +! print *, n_pt_out, real(d1(0:n_pt_out)) +! print *, real(accu) + + general_primitive_integral_cosgtos = fact_p * fact_q * accu * pi_5_2 * p_inv * q_inv / sq_ppq + +end function general_primitive_integral_cosgtos + +! --- + +complex*16 function ERI_cosgtos(alpha, beta, delta, gama, a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z) + + BEGIN_DOC + ! ATOMIC PRIMTIVE two-electron integral between the 4 primitives :: + ! primitive_1 = x1**(a_x) y1**(a_y) z1**(a_z) exp(-alpha * r1**2) + ! primitive_2 = x1**(b_x) y1**(b_y) z1**(b_z) exp(- beta * r1**2) + ! primitive_3 = x2**(c_x) y2**(c_y) z2**(c_z) exp(-delta * r2**2) + ! primitive_4 = x2**(d_x) y2**(d_y) z2**(d_z) exp(- gama * r2**2) + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z + complex*16, intent(in) :: delta, gama, alpha, beta + + integer :: a_x_2, b_x_2, c_x_2, d_x_2, a_y_2, b_y_2, c_y_2, d_y_2, a_z_2, b_z_2, c_z_2, d_z_2 + integer :: i, j, k, l, n_pt + integer :: nx, ny, nz + double precision :: ppq_re, ppq_im, ppq_mod, sq_ppq_re, sq_ppq_im + complex*16 :: p, q, ppq, sq_ppq, coeff, I_f + + ERI_cosgtos = (0.d0, 0.d0) + + ASSERT (REAL(alpha) >= 0.d0) + ASSERT (REAL(beta ) >= 0.d0) + ASSERT (REAL(delta) >= 0.d0) + ASSERT (REAL(gama ) >= 0.d0) + + nx = a_x + b_x + c_x + d_x + if(iand(nx,1) == 1) then + ERI_cosgtos = (0.d0, 0.d0) + return + endif + + ny = a_y + b_y + c_y + d_y + if(iand(ny,1) == 1) then + ERI_cosgtos = (0.d0, 0.d0) + return + endif + + nz = a_z + b_z + c_z + d_z + if(iand(nz,1) == 1) then + ERI_cosgtos = (0.d0, 0.d0) + return + endif + + n_pt = shiftl(nx+ny+nz, 1) + + p = alpha + beta + q = delta + gama + + ! get \sqrt(p + q) + !ppq = p + q + !ppq_re = REAL (ppq) + !ppq_im = AIMAG(ppq) + !ppq_mod = dsqrt(ppq_re*ppq_re + ppq_im*ppq_im) + !sq_ppq_re = sq_op5 * dsqrt(ppq_re + ppq_mod) + !sq_ppq_im = 0.5d0 * ppq_im / sq_ppq_re + !sq_ppq = sq_ppq_re + (0.d0, 1.d0) * sq_ppq_im + sq_ppq = zsqrt(p + q) + + coeff = pi_5_2 / (p * q * sq_ppq) + if(n_pt == 0) then + ERI_cosgtos = coeff + return + endif + + call integrale_new_cosgtos(I_f, a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z, p, q, n_pt) + + ERI_cosgtos = I_f * coeff + +end function ERI_cosgtos + +! --- + +subroutine integrale_new_cosgtos(I_f, a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z, p, q, n_pt) + + BEGIN_DOC + ! Calculates the integral of the polynomial : + ! + ! $I_{x_1}(a_x+b_x, c_x+d_x, p, q) \, I_{x_1}(a_y+b_y, c_y+d_y, p, q) \, I_{x_1}(a_z+b_z, c_z+d_z, p, q)$ + ! in $( 0 ; 1)$ + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: n_pt + integer, intent(in) :: a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z + complex*16, intent(out) :: I_f + + integer :: i, j, ix, iy, iz, jx, jy, jz, sx, sy, sz + complex*16 :: p, q + complex*16 :: pq_inv, p10_1, p10_2, p01_1, p01_2, pq_inv_2 + complex*16 :: B00(n_pt_max_integrals), B10(n_pt_max_integrals), B01(n_pt_max_integrals) + complex*16 :: t1(n_pt_max_integrals), t2(n_pt_max_integrals) + + + ASSERT (n_pt > 1) + + j = shiftr(n_pt, 1) + + pq_inv = (0.5d0, 0.d0) / (p + q) + p10_1 = (0.5d0, 0.d0) / p + p01_1 = (0.5d0, 0.d0) / q + p10_2 = (0.5d0, 0.d0) * q /(p * q + p * p) + p01_2 = (0.5d0, 0.d0) * p /(q * q + q * p) + pq_inv_2 = pq_inv + pq_inv + + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: t1, t2, B10, B01, B00 + ix = a_x + b_x + jx = c_x + d_x + iy = a_y + b_y + jy = c_y + d_y + iz = a_z + b_z + jz = c_z + d_z + sx = ix + jx + sy = iy + jy + sz = iz + jz + + do i = 1, n_pt + B10(i) = p10_1 - gauleg_t2(i, j) * p10_2 + B01(i) = p01_1 - gauleg_t2(i, j) * p01_2 + B00(i) = gauleg_t2(i, j) * pq_inv + enddo + + if(sx > 0) then + call I_x1_new_cosgtos(ix, jx, B10, B01, B00, t1, n_pt) + else + do i = 1, n_pt + t1(i) = (1.d0, 0.d0) + enddo + endif + + if(sy > 0) then + call I_x1_new_cosgtos(iy, jy, B10, B01, B00, t2, n_pt) + do i = 1, n_pt + t1(i) = t1(i) * t2(i) + enddo + endif + + if(sz > 0) then + call I_x1_new_cosgtos(iz, jz, B10, B01, B00, t2, n_pt) + do i = 1, n_pt + t1(i) = t1(i) * t2(i) + enddo + endif + + I_f = (0.d0, 0.d0) + do i = 1, n_pt + I_f += gauleg_w(i, j) * t1(i) + enddo + +end subroutine integrale_new_cosgtos + +! --- + +recursive subroutine I_x1_new_cosgtos(a, c, B_10, B_01, B_00, res, n_pt) + + BEGIN_DOC + ! recursive function involved in the two-electron integral + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: a, c, n_pt + complex*16, intent(in) :: B_10(n_pt_max_integrals), B_01(n_pt_max_integrals), B_00(n_pt_max_integrals) + complex*16, intent(out) :: res(n_pt_max_integrals) + + integer :: i + complex*16 :: res2(n_pt_max_integrals) + + if(c < 0) then + + do i = 1, n_pt + res(i) = (0.d0, 0.d0) + enddo + + else if (a == 0) then + + call I_x2_new_cosgtos(c, B_10, B_01, B_00, res, n_pt) + + else if (a == 1) then + + call I_x2_new_cosgtos(c-1, B_10, B_01, B_00, res, n_pt) + do i = 1, n_pt + res(i) = dble(c) * B_00(i) * res(i) + enddo + + else + + call I_x1_new_cosgtos(a-2, c , B_10, B_01, B_00, res , n_pt) + call I_x1_new_cosgtos(a-1, c-1, B_10, B_01, B_00, res2, n_pt) + do i = 1, n_pt + res(i) = dble(a-1) * B_10(i) * res(i) + dble(c) * B_00(i) * res2(i) + enddo + + endif + +end subroutine I_x1_new_cosgtos + +! --- + +recursive subroutine I_x2_new_cosgtos(c, B_10, B_01, B_00, res, n_pt) + + BEGIN_DOC + ! recursive function involved in the two-electron integral + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: c, n_pt + complex*16, intent(in) :: B_10(n_pt_max_integrals), B_01(n_pt_max_integrals), B_00(n_pt_max_integrals) + complex*16, intent(out) :: res(n_pt_max_integrals) + + integer :: i + + if(c == 1) then + + do i = 1, n_pt + res(i) = (0.d0, 0.d0) + enddo + + elseif(c == 0) then + + do i = 1, n_pt + res(i) = (1.d0, 0.d0) + enddo + + else + + call I_x1_new_cosgtos(0, c-2, B_10, B_01, B_00, res, n_pt) + do i = 1, n_pt + res(i) = dble(c-1) * B_01(i) * res(i) + enddo + + endif + +end subroutine I_x2_new_cosgtos + +! --- + +subroutine give_cpolynom_mult_center_x( P_center, Q_center, a_x, d_x, p, q, n_pt_in & + , pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, d, n_pt_out) + + BEGIN_DOC + ! subroutine that returns the explicit polynom in term of the "t" + ! variable of the following polynoms : + ! + ! $I_{x_1}(a_x,d_x,p,q) \, I_{x_1}(a_y,d_y,p,q) \ I_{x_1}(a_z,d_z,p,q)$ + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: n_pt_in, a_x, d_x + complex*16, intent(in) :: P_center, Q_center, p, q, pq_inv, p10_1, p01_1, p10_2, p01_2, pq_inv_2 + integer, intent(out) :: n_pt_out + complex*16, intent(out) :: d(0:max_dim) + + integer :: n_pt1, i + complex*16 :: B10(0:2), B01(0:2), B00(0:2), C00(0:2), D00(0:2) + + ASSERT (n_pt_in >= 0) + + B10(0) = p10_1 + B10(1) = (0.d0, 0.d0) + B10(2) = -p10_2 + + B01(0) = p01_1 + B01(1) = (0.d0, 0.d0) + B01(2) = -p01_2 + + B00(0) = (0.d0, 0.d0) + B00(1) = (0.d0, 0.d0) + B00(2) = pq_inv + + C00(0) = (0.d0, 0.d0) + C00(1) = (0.d0, 0.d0) + C00(2) = -q * (P_center - Q_center) * pq_inv_2 + + D00(0) = (0.d0, 0.d0) + D00(1) = (0.d0, 0.d0) + D00(2) = -p * (Q_center - P_center) * pq_inv_2 + + do i = 0, n_pt_in + d(i) = (0.d0, 0.d0) + enddo + + n_pt1 = n_pt_in + + !DIR$ FORCEINLINE + call I_x1_pol_mult_cosgtos(a_x, d_x, B10, B01, B00, C00, D00, d, n_pt1, n_pt_in) + n_pt_out = n_pt1 + +! print *, ' ' +! print *, a_x, d_x +! print *, real(B10), real(B01), real(B00), real(C00), real(D00) +! print *, n_pt1, real(d(0:n_pt1)) +! print *, ' ' + + if(n_pt1 < 0) then + n_pt_out = -1 + do i = 0, n_pt_in + d(i) = (0.d0, 0.d0) + enddo + return + endif + +end subroutine give_cpolynom_mult_center_x + +! --- + +subroutine I_x1_pol_mult_cosgtos(a, c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in) + + BEGIN_DOC + ! Recursive function involved in the two-electron integral + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: n_pt_in, a, c + complex*16, intent(in) :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2) + integer, intent(inout) :: nd + complex*16, intent(inout) :: d(0:max_dim) + + if( (c >= 0) .and. (nd >= 0) ) then + + if(a == 1) then + call I_x1_pol_mult_a1_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in) + else if(a == 2) then + call I_x1_pol_mult_a2_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in) + else if(a > 2) then + call I_x1_pol_mult_recurs_cosgtos(a, c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in) + else ! a == 0 + + if(c == 0)then + nd = 0 + d(0) = (1.d0, 0.d0) + return + endif + + call I_x2_pol_mult_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in) + endif + + else + + nd = -1 + + endif + +end subroutine I_x1_pol_mult_cosgtos + +! --- + +recursive subroutine I_x1_pol_mult_recurs_cosgtos(a, c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in) + + BEGIN_DOC + ! Recursive function involved in the two-electron integral + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: n_pt_in, a, c + complex*16, intent(in) :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2) + integer, intent(inout) :: nd + complex*16, intent(inout) :: d(0:max_dim) + + integer :: nx, ix, iy, ny + complex*16 :: X(0:max_dim) + complex*16 :: Y(0:max_dim) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y + + ASSERT (a > 2) + + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + enddo + + nx = 0 + if(a == 3) then + call I_x1_pol_mult_a1_cosgtos(c, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + elseif(a == 4) then + call I_x1_pol_mult_a2_cosgtos(c, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + else + ASSERT (a >= 5) + call I_x1_pol_mult_recurs_cosgtos(a-2, c, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + endif + + !DIR$ LOOP COUNT(8) + do ix = 0, nx + X(ix) *= dble(a-1) + enddo + + !DIR$ FORCEINLINE + call multiply_cpoly(X, nx, B_10, 2, d, nd) + nx = nd + + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + enddo + + if(c > 0) then + + if(a == 3) then + call I_x1_pol_mult_a2_cosgtos(c-1, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + else + ASSERT(a >= 4) + call I_x1_pol_mult_recurs_cosgtos(a-1, c-1, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + endif + + if(c > 1) then + !DIR$ LOOP COUNT(8) + do ix = 0, nx + X(ix) *= dble(c) + enddo + endif + !DIR$ FORCEINLINE + call multiply_cpoly(X, nx, B_00, 2, d, nd) + + endif + + ny = 0 + + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + Y(ix) = (0.d0, 0.d0) + enddo + + ASSERT (a > 2) + + if(a == 3) then + call I_x1_pol_mult_a2_cosgtos(c, B_10, B_01, B_00, C_00, D_00, Y, ny, n_pt_in) + else + ASSERT(a >= 4) + call I_x1_pol_mult_recurs_cosgtos(a-1, c, B_10, B_01, B_00, C_00, D_00, Y, ny, n_pt_in) + endif + + !DIR$ FORCEINLINE + call multiply_cpoly(Y, ny, C_00, 2, d, nd) + +end subroutine I_x1_pol_mult_recurs_cosgtos + +! --- + +recursive subroutine I_x1_pol_mult_a1_cosgtos(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) + + BEGIN_DOC + ! Recursive function involved in the two-electron integral + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: n_pt_in, c + complex*16, intent(in) :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2) + integer, intent(inout) :: nd + complex*16, intent(inout) :: d(0:max_dim) + + integer :: nx, ix, iy, ny + complex*16 :: X(0:max_dim) + complex*16 :: Y(0:max_dim) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y + + if( (c < 0) .or. (nd < 0) ) then + nd = -1 + return + endif + + nx = nd + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + enddo + call I_x2_pol_mult_cosgtos(c-1, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + + if(c > 1) then + !DIR$ LOOP COUNT(8) + do ix = 0, nx + X(ix) *= dble(c) + enddo + endif + + !DIR$ FORCEINLINE + call multiply_cpoly(X, nx, B_00, 2, d, nd) + + ny = 0 + + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + Y(ix) = (0.d0, 0.d0) + enddo + call I_x2_pol_mult_cosgtos(c, B_10, B_01, B_00, C_00, D_00, Y, ny, n_pt_in) + + !DIR$ FORCEINLINE + call multiply_cpoly(Y, ny, C_00, 2, d, nd) + +end subroutine I_x1_pol_mult_a1_cosgtos + +! --- + +recursive subroutine I_x1_pol_mult_a2_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in) + + BEGIN_DOC + ! Recursive function involved in the two-electron integral + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: n_pt_in, c + complex*16, intent(in) :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2) + integer, intent(inout) :: nd + complex*16, intent(inout) :: d(0:max_dim) + + integer :: nx, ix, iy, ny + complex*16 :: X(0:max_dim) + complex*16 :: Y(0:max_dim) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y + + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + enddo + + nx = 0 + call I_x2_pol_mult_cosgtos(c, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + + !DIR$ FORCEINLINE + call multiply_cpoly(X, nx, B_10, 2, d, nd) + + nx = nd + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + enddo + + !DIR$ FORCEINLINE + call I_x1_pol_mult_a1_cosgtos(c-1, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + + if (c>1) then + !DIR$ LOOP COUNT(8) + do ix = 0, nx + X(ix) *= dble(c) + enddo + endif + + !DIR$ FORCEINLINE + call multiply_cpoly(X, nx, B_00, 2, d, nd) + + ny = 0 + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + Y(ix) = 0.d0 + enddo + !DIR$ FORCEINLINE + call I_x1_pol_mult_a1_cosgtos(c, B_10, B_01, B_00, C_00, D_00, Y, ny, n_pt_in) + + !DIR$ FORCEINLINE + call multiply_cpoly(Y, ny, C_00, 2, d, nd) + +end subroutine I_x1_pol_mult_a2_cosgtos + +! --- + +recursive subroutine I_x2_pol_mult_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, dim) + + BEGIN_DOC + ! Recursive function involved in the two-electron integral + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: dim, c + complex*16, intent(in) :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2) + integer, intent(inout) :: nd + complex*16, intent(inout) :: d(0:max_dim) + + integer :: i + integer :: nx, ix, ny + complex*16 :: X(0:max_dim), Y(0:max_dim) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y + + select case (c) + + case (0) + nd = 0 + d(0) = (1.d0, 0.d0) + return + + case (:-1) + nd = -1 + return + + case (1) + nd = 2 + d(0) = D_00(0) + d(1) = D_00(1) + d(2) = D_00(2) + return + + case (2) + nd = 2 + d(0) = B_01(0) + d(1) = B_01(1) + d(2) = B_01(2) + + ny = 2 + Y(0) = D_00(0) + Y(1) = D_00(1) + Y(2) = D_00(2) + + !DIR$ FORCEINLINE + call multiply_cpoly(Y, ny, D_00, 2, d, nd) + return + + case default + + !DIR$ LOOP COUNT(6) + do ix = 0, c+c + X(ix) = (0.d0, 0.d0) + enddo + nx = 0 + call I_x2_pol_mult_cosgtos(c-2, B_10, B_01, B_00, C_00, D_00, X, nx, dim) + + !DIR$ LOOP COUNT(6) + do ix = 0, nx + X(ix) *= dble(c-1) + enddo + + !DIR$ FORCEINLINE + call multiply_cpoly(X, nx, B_01, 2, d, nd) + + ny = 0 + !DIR$ LOOP COUNT(6) + do ix = 0, c+c + Y(ix) = 0.d0 + enddo + call I_x2_pol_mult_cosgtos(c-1, B_10, B_01, B_00, C_00, D_00, Y, ny, dim) + + !DIR$ FORCEINLINE + call multiply_cpoly(Y, ny, D_00, 2, d, nd) + + end select + +end subroutine I_x2_pol_mult_cosgtos + +! --- + + diff --git a/src/utils/cgtos_one_e.irp.f b/src/utils/cgtos_one_e.irp.f new file mode 100644 index 00000000..43ca8224 --- /dev/null +++ b/src/utils/cgtos_one_e.irp.f @@ -0,0 +1,120 @@ + +! --- + +complex*16 function overlap_cgaussian_x(A_center, B_center, alpha, beta, power_A, power_B, dim) + + BEGIN_DOC + ! + ! \int_{-infty}^{+infty} (x-A_x)^ax (x-B_x)^bx exp(-alpha (x-A_x)^2) exp(- beta(x-B_X)^2) dx + ! with complex arguments + ! + END_DOC + + implicit none + include 'constants.include.F' + + integer, intent(in) :: dim, power_A, power_B + complex*16, intent(in) :: A_center, B_center, alpha, beta + + integer :: i, iorder_p + double precision :: fact_p_mod + complex*16 :: P_new(0:max_dim), P_center, fact_p, p, inv_sq_p + + complex*16 :: Fc_integral + + + call give_explicit_cpoly_and_cgaussian_x( P_new, P_center, p, fact_p, iorder_p & + , alpha, beta, power_A, power_B, A_center, B_center, dim) + + fact_p_mod = dsqrt(real(fact_p)*real(fact_p) + aimag(fact_p)*aimag(fact_p)) + if(fact_p_mod .lt. 1.d-14) then + overlap_cgaussian_x = (0.d0, 0.d0) + return + endif + + + inv_sq_p = (1.d0, 0.d0) / zsqrt(p) + + overlap_cgaussian_x = (0.d0, 0.d0) + do i = 0, iorder_p + overlap_cgaussian_x += P_new(i) * Fc_integral(i, inv_sq_p) + enddo + + overlap_cgaussian_x *= fact_p + +end function overlap_cgaussian_x + +! --- + +subroutine overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_x, overlap_y, overlap_z, overlap, dim ) + + BEGIN_DOC + ! + ! S_x = \int (x-A_x)^{a_x} exp(-\alpha(x-A_x)^2) (x-B_x)^{b_x} exp(-beta(x-B_x)^2) dx + ! S = S_x S_y S_z + ! for complex arguments + ! + END_DOC + + implicit none + include 'constants.include.F' + + integer, intent(in) :: dim, power_A(3), power_B(3) + complex*16, intent(in) :: A_center(3), B_center(3), alpha, beta + complex*16, intent(out) :: overlap_x, overlap_y, overlap_z, overlap + + integer :: i, nmax, iorder_p(3) + double precision :: fact_p_mod + complex*16 :: P_new(0:max_dim,3), P_center(3), fact_p, p, inv_sq_p + complex*16 :: F_integral_tab(0:max_dim) + + complex*16 :: Fc_integral + + call give_explicit_cpoly_and_cgaussian(P_new, P_center, p, fact_p, iorder_p, alpha, beta, power_A, power_B, A_center, B_center, dim) + + fact_p_mod = dsqrt(real(fact_p)*real(fact_p) + aimag(fact_p)*aimag(fact_p)) + if(fact_p_mod .lt. 1.d-14) then + overlap_x = (1.d-10, 0.d0) + overlap_y = (1.d-10, 0.d0) + overlap_z = (1.d-10, 0.d0) + overlap = (1.d-10, 0.d0) + return + endif + + nmax = maxval(iorder_p) + + inv_sq_p = (1.d0, 0.d0) / zsqrt(p) + do i = 0, nmax + F_integral_tab(i) = Fc_integral(i, inv_sq_p) + enddo + + overlap_x = P_new(0,1) * F_integral_tab(0) + overlap_y = P_new(0,2) * F_integral_tab(0) + overlap_z = P_new(0,3) * F_integral_tab(0) + + do i = 1, iorder_p(1) + overlap_x = overlap_x + P_new(i,1) * F_integral_tab(i) + enddo + call cgaussian_product_x(alpha, A_center(1), beta, B_center(1), fact_p, p, P_center(1)) + overlap_x *= fact_p + + do i = 1, iorder_p(2) + overlap_y = overlap_y + P_new(i,2) * F_integral_tab(i) + enddo + call cgaussian_product_x(alpha, A_center(2), beta, B_center(2), fact_p, p, P_center(2)) + overlap_y *= fact_p + + do i = 1, iorder_p(3) + overlap_z = overlap_z + P_new(i,3) * F_integral_tab(i) + enddo + call cgaussian_product_x(alpha, A_center(3), beta, B_center(3), fact_p, p, P_center(3)) + overlap_z *= fact_p + + overlap = overlap_x * overlap_y * overlap_z + +end subroutine overlap_cgaussian_xyz + +! --- + + diff --git a/src/utils/cgtos_utils.irp.f b/src/utils/cgtos_utils.irp.f new file mode 100644 index 00000000..a820d5f2 --- /dev/null +++ b/src/utils/cgtos_utils.irp.f @@ -0,0 +1,780 @@ + +! --- + +subroutine give_explicit_cpoly_and_cgaussian_x(P_new, P_center, p, fact_k, iorder, alpha, beta, a, b, A_center, B_center, dim) + + BEGIN_DOC + ! Transform the product of + ! (x-x_A)^a (x-x_B)^b exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta) + ! into + ! fact_k \sum_{i=0}^{iorder} (x-x_P)^i exp(-p(r-P)^2) + END_DOC + + implicit none + include 'constants.include.F' + + integer, intent(in) :: dim + integer, intent(in) :: a, b + complex*16, intent(in) :: alpha, beta, A_center, B_center + integer, intent(out) :: iorder + complex*16, intent(out) :: p, P_center, fact_k + complex*16, intent(out) :: P_new(0:max_dim) + + integer :: n_new, i, j + double precision :: tmp_mod + complex*16 :: P_a(0:max_dim), P_b(0:max_dim) + complex*16 :: p_inv, ab, d_AB, tmp + + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: P_a, P_b + + P_new = (0.d0, 0.d0) + + ! new exponent + p = alpha + beta + + ! new center + p_inv = (1.d0, 0.d0) / p + ab = alpha * beta + P_center = (alpha * A_center + beta * B_center) * p_inv + + ! get the factor + d_AB = (A_center - B_center) * (A_center - B_center) + tmp = ab * p_inv * d_AB + tmp_mod = dsqrt(REAL(tmp)*REAL(tmp) + AIMAG(tmp)*AIMAG(tmp)) + if(tmp_mod .lt. 50.d0) then + fact_k = zexp(-tmp) + else + fact_k = (0.d0, 0.d0) + endif + + ! Recenter the polynomials P_a and P_b on P_center + !DIR$ FORCEINLINE + call recentered_cpoly2(P_a(0), A_center, P_center, a, P_b(0), B_center, P_center, b) + n_new = 0 + + !DIR$ FORCEINLINE + call multiply_cpoly(P_a(0), a, P_b(0), b, P_new(0), n_new) + iorder = a + b + +end subroutine give_explicit_cpoly_and_cgaussian_x + +! --- + +subroutine give_explicit_cpoly_and_cgaussian(P_new, P_center, p, fact_k, iorder, alpha, beta, a, b, A_center, B_center, dim) + + BEGIN_DOC + ! Transforms the product of + ! (x-x_A)^a(1) (x-x_B)^b(1) (y-y_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta) + ! into + ! fact_k * [ sum (l_x = 0,i_order(1)) P_new(l_x,1) * (x-P_center(1))^l_x ] exp (- p (x-P_center(1))^2 ) + ! * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 ) + ! * [ sum (l_z = 0,i_order(3)) P_new(l_z,3) * (z-P_center(3))^l_z ] exp (- p (z-P_center(3))^2 ) + ! + ! WARNING ::: IF fact_k is too smal then: + ! returns a "s" function centered in zero + ! with an inifinite exponent and a zero polynom coef + END_DOC + + implicit none + include 'constants.include.F' + + integer, intent(in) :: dim, a(3), b(3) + complex*16, intent(in) :: alpha, beta, A_center(3), B_center(3) + integer, intent(out) :: iorder(3) + complex*16, intent(out) :: p, P_center(3), fact_k, P_new(0:max_dim,3) + + integer :: n_new, i, j + double precision :: tmp_mod + complex*16 :: P_a(0:max_dim,3), P_b(0:max_dim,3) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: P_a, P_b + + iorder(1) = 0 + iorder(2) = 0 + iorder(3) = 0 + + P_new(0,1) = (0.d0, 0.d0) + P_new(0,2) = (0.d0, 0.d0) + P_new(0,3) = (0.d0, 0.d0) + + !DIR$ FORCEINLINE + call cgaussian_product(alpha, A_center, beta, B_center, fact_k, p, P_center) + + ! IF fact_k is too smal then: returns a "s" function centered in zero + ! with an inifinite exponent and a zero polynom coef + tmp_mod = dsqrt(REAL(fact_k)*REAL(fact_k) + AIMAG(fact_k)*AIMAG(fact_k)) + if(tmp_mod < 1d-14) then + iorder = 0 + p = (1.d+14, 0.d0) + fact_k = (0.d0 , 0.d0) + P_new(0:max_dim,1:3) = (0.d0 , 0.d0) + P_center(1:3) = (0.d0 , 0.d0) + return + endif + + !DIR$ FORCEINLINE + call recentered_cpoly2(P_a(0,1), A_center(1), P_center(1), a(1), P_b(0,1), B_center(1), P_center(1), b(1)) + iorder(1) = a(1) + b(1) + do i = 0, iorder(1) + P_new(i,1) = 0.d0 + enddo + n_new = 0 + !DIR$ FORCEINLINE + call multiply_cpoly(P_a(0,1), a(1), P_b(0,1), b(1), P_new(0,1), n_new) + + !DIR$ FORCEINLINE + call recentered_cpoly2(P_a(0,2), A_center(2), P_center(2), a(2), P_b(0,2), B_center(2), P_center(2), b(2)) + iorder(2) = a(2) + b(2) + do i = 0, iorder(2) + P_new(i,2) = 0.d0 + enddo + n_new = 0 + !DIR$ FORCEINLINE + call multiply_cpoly(P_a(0,2), a(2), P_b(0,2), b(2), P_new(0,2), n_new) + + !DIR$ FORCEINLINE + call recentered_cpoly2(P_a(0,3), A_center(3), P_center(3), a(3), P_b(0,3), B_center(3), P_center(3), b(3)) + iorder(3) = a(3) + b(3) + do i = 0, iorder(3) + P_new(i,3) = 0.d0 + enddo + n_new = 0 + !DIR$ FORCEINLINE + call multiply_cpoly(P_a(0,3), a(3), P_b(0,3), b(3), P_new(0,3), n_new) + +end subroutine give_explicit_cpoly_and_cgaussian + +! --- + +!subroutine give_explicit_poly_and_gaussian_double(P_new,P_center,p,fact_k,iorder,alpha,beta,gama,a,b,A_center,B_center,Nucl_center,dim) +! BEGIN_DOC +! ! Transforms the product of +! ! (x-x_A)^a(1) (x-x_B)^b(1) (x-x_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) +! ! exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta) exp(-(r-Nucl_center)^2 gama +! ! +! ! into +! ! fact_k * [ sum (l_x = 0,i_order(1)) P_new(l_x,1) * (x-P_center(1))^l_x ] exp (- p (x-P_center(1))^2 ) +! ! * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 ) +! ! * [ sum (l_z = 0,i_order(3)) P_new(l_z,3) * (z-P_center(3))^l_z ] exp (- p (z-P_center(3))^2 ) +! END_DOC +! implicit none +! include 'constants.include.F' +! integer, intent(in) :: dim +! integer, intent(in) :: a(3),b(3) ! powers : (x-xa)**a_x = (x-A(1))**a(1) +! double precision, intent(in) :: alpha, beta, gama ! exponents +! double precision, intent(in) :: A_center(3) ! A center +! double precision, intent(in) :: B_center (3) ! B center +! double precision, intent(in) :: Nucl_center(3) ! B center +! double precision, intent(out) :: P_center(3) ! new center +! double precision, intent(out) :: p ! new exponent +! double precision, intent(out) :: fact_k ! constant factor +! double precision, intent(out) :: P_new(0:max_dim,3)! polynomial +! integer , intent(out) :: iorder(3) ! i_order(i) = order of the polynomials +! +! double precision :: P_center_tmp(3) ! new center +! double precision :: p_tmp ! new exponent +! double precision :: fact_k_tmp,fact_k_bis ! constant factor +! double precision :: P_new_tmp(0:max_dim,3)! polynomial +! integer :: i,j +! double precision :: binom_func +! +! ! First you transform the two primitives into a sum of primitive with the same center P_center_tmp and gaussian exponent p_tmp +! call give_explicit_cpoly_and_cgaussian(P_new_tmp,P_center_tmp,p_tmp,fact_k_tmp,iorder,alpha,beta,a,b,A_center,B_center,dim) +! ! Then you create the new gaussian from the product of the new one per the Nuclei one +! call cgaussian_product(p_tmp,P_center_tmp,gama,Nucl_center,fact_k_bis,p,P_center) +! fact_k = fact_k_bis * fact_k_tmp +! +! ! Then you build the coefficient of the new polynom +! do i = 0, iorder(1) +! P_new(i,1) = 0.d0 +! do j = i,iorder(1) +! P_new(i,1) = P_new(i,1) + P_new_tmp(j,1) * binom_func(j,j-i) * (P_center(1) - P_center_tmp(1))**(j-i) +! enddo +! enddo +! do i = 0, iorder(2) +! P_new(i,2) = 0.d0 +! do j = i,iorder(2) +! P_new(i,2) = P_new(i,2) + P_new_tmp(j,2) * binom_func(j,j-i) * (P_center(2) - P_center_tmp(2))**(j-i) +! enddo +! enddo +! do i = 0, iorder(3) +! P_new(i,3) = 0.d0 +! do j = i,iorder(3) +! P_new(i,3) = P_new(i,3) + P_new_tmp(j,3) * binom_func(j,j-i) * (P_center(3) - P_center_tmp(3))**(j-i) +! enddo +! enddo +! +!end + +! --- + +subroutine cgaussian_product(a, xa, b, xb, k, p, xp) + + BEGIN_DOC + ! complex Gaussian product + ! e^{-a (r-r_A)^2} e^{-b (r-r_B)^2} = k e^{-p (r-r_P)^2} + END_DOC + + implicit none + complex*16, intent(in) :: a, b, xa(3), xb(3) + complex*16, intent(out) :: p, k, xp(3) + + double precision :: tmp_mod + complex*16 :: p_inv, xab(3), ab + + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xab + + ASSERT (REAL(a) > 0.) + ASSERT (REAL(b) > 0.) + + ! new exponent + p = a + b + + xab(1) = xa(1) - xb(1) + xab(2) = xa(2) - xb(2) + xab(3) = xa(3) - xb(3) + + p_inv = (1.d0, 0.d0) / p + ab = a * b * p_inv + + k = ab * (xab(1)*xab(1) + xab(2)*xab(2) + xab(3)*xab(3)) + tmp_mod = dsqrt(REAL(k)*REAL(k) + AIMAG(k)*AIMAG(k)) + if(tmp_mod .gt. 40.d0) then + k = (0.d0, 0.d0) + xp(1:3) = (0.d0, 0.d0) + return + endif + + k = zexp(-k) + xp(1) = ( a * xa(1) + b * xb(1) ) * p_inv + xp(2) = ( a * xa(2) + b * xb(2) ) * p_inv + xp(3) = ( a * xa(3) + b * xb(3) ) * p_inv + +end subroutine cgaussian_product + +! --- + +subroutine cgaussian_product_x(a, xa, b, xb, k, p, xp) + + BEGIN_DOC + ! complex Gaussian product in 1D. + ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K e^{-p (x-x_P)^2} + END_DOC + + implicit none + complex*16, intent(in) :: a, b, xa, xb + complex*16, intent(out) :: p, k, xp + + double precision :: tmp_mod + complex*16 :: p_inv + complex*16 :: xab, ab + + ASSERT (REAL(a) > 0.) + ASSERT (REAL(b) > 0.) + + ! new center + p = a + b + + xab = xa - xb + + p_inv = (1.d0, 0.d0) / p + ab = a * b * p_inv + + k = ab * xab*xab + tmp_mod = dsqrt(REAL(k)*REAL(k) + AIMAG(k)*AIMAG(k)) + if(tmp_mod > 40.d0) then + k = (0.d0, 0.d0) + xp = (0.d0, 0.d0) + return + endif + + k = zexp(-k) + xp = (a*xa + b*xb) * p_inv + +end subroutine cgaussian_product_x + +! --- + +subroutine multiply_cpoly(b, nb, c, nc, d, nd) + + BEGIN_DOC + ! Multiply two complex polynomials + ! D(t) += B(t) * C(t) + END_DOC + + implicit none + + integer, intent(in) :: nb, nc + complex*16, intent(in) :: b(0:nb), c(0:nc) + complex*16, intent(inout) :: d(0:nb+nc) + integer, intent(out) :: nd + + integer :: ndtmp, ib, ic + double precision :: tmp_mod + complex*16 :: tmp + + if(ior(nc, nb) >= 0) then ! True if nc>=0 and nb>=0 + continue + else + return + endif + + ndtmp = nb + nc + + do ic = 0, nc + d(ic) = d(ic) + c(ic) * b(0) + enddo + + do ib = 1, nb + d(ib) = d(ib) + c(0) * b(ib) + do ic = 1, nc + d(ib+ic) = d(ib+ic) + c(ic) * b(ib) + enddo + enddo + + do nd = ndtmp, 0, -1 + tmp = d(nd) + tmp_mod = dsqrt(REAL(tmp)*REAL(tmp) + AIMAG(tmp)*AIMAG(tmp)) + if(tmp_mod .lt. 1.d-15) cycle + exit + enddo + +end subroutine multiply_cpoly + +! --- + +subroutine add_cpoly(b, nb, c, nc, d, nd) + + BEGIN_DOC + ! Add two complex polynomials + ! D(t) += B(t) + C(t) + END_DOC + + implicit none + complex*16, intent(in) :: b(0:nb), c(0:nc) + integer, intent(inout) :: nb, nc + integer, intent(out) :: nd + complex*16, intent(out) :: d(0:nb+nc) + + integer :: ib + double precision :: tmp_mod + complex*16 :: tmp + + nd = nb + nc + do ib = 0, max(nb, nc) + d(ib) = d(ib) + c(ib) + b(ib) + enddo + + tmp = d(nd) + tmp_mod = dsqrt(REAL(tmp)*REAL(tmp) + AIMAG(tmp)*AIMAG(tmp)) + do while( (tmp_mod .lt. 1.d-15) .and. (nd >= 0) ) + nd -= 1 + tmp = d(nd) + tmp_mod = dsqrt(REAL(tmp)*REAL(tmp) + AIMAG(tmp)*AIMAG(tmp)) + if(nd < 0) exit + enddo + +end subroutine add_cpoly + +! --- + +subroutine add_cpoly_multiply(b, nb, cst, d, nd) + + BEGIN_DOC + ! Add a complex polynomial multiplied by a complex constant + ! D(t) += cst * B(t) + END_DOC + + implicit none + + integer, intent(in) :: nb + complex*16, intent(in) :: b(0:nb), cst + integer, intent(inout) :: nd + complex*16, intent(inout) :: d(0:max(nb, nd)) + + integer :: ib + double precision :: tmp_mod + complex*16 :: tmp + + nd = max(nd, nb) + if(nd /= -1) then + + do ib = 0, nb + d(ib) = d(ib) + cst * b(ib) + enddo + + tmp = d(nd) + tmp_mod = dsqrt(REAL(tmp)*REAL(tmp) + AIMAG(tmp)*AIMAG(tmp)) + do while(tmp_mod .lt. 1.d-15) + nd -= 1 + if(nd < 0) exit + tmp = d(nd) + tmp_mod = dsqrt(REAL(tmp)*REAL(tmp) + AIMAG(tmp)*AIMAG(tmp)) + enddo + + endif + +end subroutine add_cpoly_multiply + +! --- + +subroutine recentered_cpoly2(P_A, x_A, x_P, a, P_B, x_B, x_Q, b) + + BEGIN_DOC + ! + ! write two complex polynomials (x-x_A)^a (x-x_B)^b + ! as P_A(x-x_P) and P_B(x-x_Q) + ! + END_DOC + + implicit none + + integer, intent(in) :: a, b + complex*16, intent(in) :: x_A, x_P, x_B, x_Q + complex*16, intent(out) :: P_A(0:a), P_B(0:b) + + integer :: i, minab, maxab + complex*16 :: pows_a(-2:a+b+4), pows_b(-2:a+b+4) + + double precision :: binom_func + + if((a<0) .or. (b<0)) return + + maxab = max(a, b) + minab = max(min(a, b), 0) + + pows_a(0) = (1.d0, 0.d0) + pows_a(1) = x_P - x_A + + pows_b(0) = (1.d0, 0.d0) + pows_b(1) = x_Q - x_B + + do i = 2, maxab + pows_a(i) = pows_a(i-1) * pows_a(1) + pows_b(i) = pows_b(i-1) * pows_b(1) + enddo + + P_A(0) = pows_a(a) + P_B(0) = pows_b(b) + + do i = 1, min(minab, 20) + P_A(i) = binom_transp(a-i,a) * pows_a(a-i) + P_B(i) = binom_transp(b-i,b) * pows_b(b-i) + enddo + + do i = minab+1, min(a, 20) + P_A(i) = binom_transp(a-i,a) * pows_a(a-i) + enddo + do i = minab+1, min(b, 20) + P_B(i) = binom_transp(b-i,b) * pows_b(b-i) + enddo + + do i = 101, a + P_A(i) = binom_func(a,a-i) * pows_a(a-i) + enddo + do i = 101, b + P_B(i) = binom_func(b,b-i) * pows_b(b-i) + enddo + +end subroutine recentered_cpoly2 + +! --- + +complex*16 function Fc_integral(n, inv_sq_p) + + BEGIN_DOC + ! function that calculates the following integral + ! \int_{\-infty}^{+\infty} x^n \exp(-p x^2) dx + ! for complex valued p + END_DOC + + implicit none + include 'constants.include.F' + + integer, intent(in) :: n + complex*16, intent(in) :: inv_sq_p + + ! (n)! + double precision :: fact + + if(n < 0) then + Fc_integral = (0.d0, 0.d0) + return + endif + + ! odd n + if(iand(n, 1) .ne. 0) then + Fc_integral = (0.d0, 0.d0) + return + endif + + if(n == 0) then + Fc_integral = sqpi * inv_sq_p + return + endif + + Fc_integral = sqpi * 0.5d0**n * inv_sq_p**dble(n+1) * fact(n) / fact(shiftr(n, 1)) + +end function Fc_integral + +! --- + +complex*16 function crint(n, rho) + + implicit none + include 'constants.include.F' + + integer, intent(in) :: n + complex*16, intent(in) :: rho + + integer :: i, mmax + double precision :: rho_mod, rho_re, rho_im + double precision :: sq_rho_re, sq_rho_im + double precision :: n_tmp + complex*16 :: sq_rho, rho_inv, rho_exp + + complex*16 :: crint_smallz, cpx_erf + + rho_re = REAL (rho) + rho_im = AIMAG(rho) + rho_mod = dsqrt(rho_re*rho_re + rho_im*rho_im) + + if(rho_mod < 10.d0) then + ! small z + + if(rho_mod .lt. 1.d-10) then + crint = 1.d0 / dble(n + n + 1) + else + crint = crint_smallz(n, rho) + endif + + else + ! large z + + if(rho_mod .gt. 40.d0) then + + n_tmp = dble(n) + 0.5d0 + crint = 0.5d0 * gamma(n_tmp) / (rho**n_tmp) + + else + + ! get \sqrt(rho) + sq_rho_re = sq_op5 * dsqrt(rho_re + rho_mod) + sq_rho_im = 0.5d0 * rho_im / sq_rho_re + sq_rho = sq_rho_re + (0.d0, 1.d0) * sq_rho_im + + rho_exp = 0.5d0 * zexp(-rho) + rho_inv = (1.d0, 0.d0) / rho + + crint = 0.5d0 * sqpi * cpx_erf(sq_rho_re, sq_rho_im) / sq_rho + mmax = n + if(mmax .gt. 0) then + do i = 0, mmax-1 + crint = ((dble(i) + 0.5d0) * crint - rho_exp) * rho_inv + enddo + endif + + ! *** + + endif + + endif + +! print *, n, real(rho), real(crint) + +end function crint + +! --- + +complex*16 function crint_sum(n_pt_out, rho, d1) + + implicit none + include 'constants.include.F' + + integer, intent(in) :: n_pt_out + complex*16, intent(in) :: rho, d1(0:n_pt_out) + + integer :: n, i, mmax + double precision :: rho_mod, rho_re, rho_im + double precision :: sq_rho_re, sq_rho_im + complex*16 :: sq_rho, F0 + complex*16 :: rho_tmp, rho_inv, rho_exp + complex*16, allocatable :: Fm(:) + + complex*16 :: crint_smallz, cpx_erf + + rho_re = REAL (rho) + rho_im = AIMAG(rho) + rho_mod = dsqrt(rho_re*rho_re + rho_im*rho_im) + + if(rho_mod < 10.d0) then + ! small z + + if(rho_mod .lt. 1.d-10) then + +! print *, ' 111' +! print *, ' rho = ', rho + + crint_sum = d1(0) +! print *, 0, 1 + + do i = 2, n_pt_out, 2 + + n = shiftr(i, 1) + crint_sum = crint_sum + d1(i) / dble(n+n+1) + +! print *, n, 1.d0 / dble(n+n+1) + enddo + + ! *** + + else + +! print *, ' 222' +! print *, ' rho = ', real(rho) +! if(abs(aimag(rho)) .gt. 1d-15) then +! print *, ' complex rho', rho +! stop +! endif + + crint_sum = d1(0) * crint_smallz(0, rho) + +! print *, 0, real(d1(0)), real(crint_smallz(0, rho)) +! if(abs(aimag(d1(0))) .gt. 1d-15) then +! print *, ' complex d1(0)', d1(0) +! stop +! endif + + do i = 2, n_pt_out, 2 + n = shiftr(i, 1) + crint_sum = crint_sum + d1(i) * crint_smallz(n, rho) + +! print *, n, real(d1(i)), real(crint_smallz(n, rho)) +! if(abs(aimag(d1(i))) .gt. 1d-15) then +! print *, ' complex d1(i)', i, d1(i) +! stop +! endif + + enddo + +! print *, 'sum = ', real(crint_sum) +! if(abs(aimag(crint_sum)) .gt. 1d-15) then +! print *, ' complex crint_sum', crint_sum +! stop +! endif + + ! *** + + endif + + else + ! large z + + if(rho_mod .gt. 40.d0) then + +! print *, ' 333' +! print *, ' rho = ', rho + + rho_inv = (1.d0, 0.d0) / rho + rho_tmp = 0.5d0 * sqpi * zsqrt(rho_inv) + crint_sum = rho_tmp * d1(0) +! print *, 0, rho_tmp + + do i = 2, n_pt_out, 2 + n = shiftr(i, 1) + rho_tmp = rho_tmp * (dble(n) + 0.5d0) * rho_inv + crint_sum = crint_sum + rho_tmp * d1(i) +! print *, n, rho_tmp + enddo + + ! *** + + else + +! print *, ' 444' +! print *, ' rho = ', rho + + ! get \sqrt(rho) + sq_rho_re = sq_op5 * dsqrt(rho_re + rho_mod) + sq_rho_im = 0.5d0 * rho_im / sq_rho_re + sq_rho = sq_rho_re + (0.d0, 1.d0) * sq_rho_im + !sq_rho = zsqrt(rho) + + + F0 = 0.5d0 * sqpi * cpx_erf(sq_rho_re, sq_rho_im) / sq_rho + crint_sum = F0 * d1(0) +! print *, 0, F0 + + rho_exp = 0.5d0 * zexp(-rho) + rho_inv = (1.d0, 0.d0) / rho + + mmax = shiftr(n_pt_out, 1) + if(mmax .gt. 0) then + + allocate( Fm(mmax) ) + Fm(1:mmax) = (0.d0, 0.d0) + + do n = 0, mmax-1 + F0 = ((dble(n) + 0.5d0) * F0 - rho_exp) * rho_inv + Fm(n+1) = F0 +! print *, n, F0 + enddo + + do i = 2, n_pt_out, 2 + n = shiftr(i, 1) + crint_sum = crint_sum + Fm(n) * d1(i) + enddo + deallocate(Fm) + endif + + ! *** + + endif + + endif + +end function crint_sum + +! --- + +complex*16 function crint_smallz(n, rho) + + BEGIN_DOC + ! Standard version of rint + END_DOC + + implicit none + integer, intent(in) :: n + complex*16, intent(in) :: rho + + integer, parameter :: kmax = 40 + double precision, parameter :: eps = 1.d-13 + + integer :: k + double precision :: delta_mod + complex*16 :: rho_k, ct, delta_k + + ct = 0.5d0 * zexp(-rho) * gamma(dble(n) + 0.5d0) + rho_k = (1.d0, 0.d0) + crint_smallz = ct * rho_k / gamma(dble(n) + 1.5d0) + + do k = 1, kmax + + rho_k = rho_k * rho + delta_k = ct * rho_k / gamma(dble(n+k) + 1.5d0) + crint_smallz = crint_smallz + delta_k + + delta_mod = dsqrt(REAL(delta_k)*REAL(delta_k) + AIMAG(delta_k)*AIMAG(delta_k)) + if(delta_mod .lt. eps) return + enddo + + if(delta_mod > eps) then + write(*,*) ' pb in crint_smallz !' + write(*,*) ' n, rho = ', n, rho + write(*,*) ' delta_mod = ', delta_mod + stop 1 + endif + +end function crint_smallz + +! --- + diff --git a/src/utils/cpx_erf.irp.f b/src/utils/cpx_erf.irp.f new file mode 100644 index 00000000..61f81055 --- /dev/null +++ b/src/utils/cpx_erf.irp.f @@ -0,0 +1,204 @@ + +! --- + +complex*16 function cpx_erf(x, y) + + BEGIN_DOC + ! + ! compute erf(z) for z = x + i y + ! + ! REF: Abramowitz and Stegun + ! + END_DOC + + implicit none + + double precision, intent(in) :: x, y + + double precision :: yabs + complex*16 :: erf_tmp1, erf_tmp2, erf_tmp3, erf_tot + + double precision :: erf_F + complex*16 :: erf_E, erf_G, erf_H + + yabs = dabs(y) + + if(yabs .lt. 1.d-15) then + + cpx_erf = (1.d0, 0.d0) * derf(x) + return + + else + + erf_tmp1 = (1.d0, 0.d0) * derf(x) + erf_tmp2 = erf_E(x, yabs) + erf_F(x, yabs) + erf_tmp3 = zexp(-(0.d0, 2.d0) * x * yabs) * ( erf_G(x, yabs) + erf_H(x, yabs) ) + erf_tot = erf_tmp1 + erf_tmp2 - erf_tmp3 + + endif + + if(y .gt. 0.d0) then + cpx_erf = erf_tot + else + cpx_erf = CONJG(erf_tot) + endif + +end function cpx_erf + +! --- + +complex*16 function erf_E(x, yabs) + + implicit none + include 'constants.include.F' + + double precision, intent(in) :: x, yabs + + if( (dabs(x).gt.6.d0) .or. (x==0.d0) ) then + erf_E = (0.d0, 0.d0) + return + endif + + if(dabs(x) .lt. 1.d-7) then + + erf_E = -inv_pi * (0.d0, 1.d0) * yabs + + else + + erf_E = 0.5d0 * inv_pi * dexp(-x*x) & + * ((1.d0, 0.d0) - zexp(-(2.d0, 0.d0) * x * yabs)) / x + + endif + +end function erf_E + +! --- + +double precision function erf_F(x, yabs) + + implicit none + include 'constants.include.F' + + double precision, intent(in) :: x, yabs + + integer, parameter :: Nmax = 13 + + integer :: i + double precision :: tmp1, tmp2, x2, ct + + + if(dabs(x) .gt. 5.8d0) then + + erf_F = 0.d0 + + else + + x2 = x * x + ct = x * inv_pi + + erf_F = 0.d0 + do i = 1, Nmax + + tmp1 = 0.25d0 * dble(i) * dble(i) + x2 + tmp2 = dexp(-tmp1) / tmp1 + erf_F = erf_F + tmp2 + + if(dabs(tmp2) .lt. 1d-15) exit + enddo + erf_F = ct * erf_F + + endif + +end function erf_F + +! --- + +complex*16 function erf_G(x, yabs) + + implicit none + include 'constants.include.F' + + double precision, intent(in) :: x, yabs + + integer, parameter :: Nmax = 13 + + integer :: i, tmpi, imin, imax + double precision :: tmp0, tmp1, x2, idble + complex*16 :: tmp2 + + if(x .eq. 0.d0) then + erf_G = (0.d0, 0.d0) + return + endif + + tmpi = int(2.d0 * yabs) + imin = max(1, tmpi-Nmax) + imax = tmpi + Nmax + + x2 = x * x + + erf_G = 0.d0 + do i = imin, imax + + idble = dble(i) + tmp0 = 0.5d0 * idble + tmp1 = tmp0 * tmp0 + x2 + tmp2 = dexp( idble * yabs - tmp1 - dlog(tmp1) - dlog_2pi) * (x - (0.d0, 1.d0)*tmp0) + + erf_G = erf_G + tmp2 + + enddo + +end function erf_G + +! --- + +complex*16 function erf_H(x, yabs) + + implicit none + include 'constants.include.F' + + double precision, intent(in) :: x, yabs + + integer, parameter :: Nmax = 13 + + integer :: i + double precision :: tmp0, tmp1, tmp_mod, x2, ct, idble + complex*16 :: tmp2 + + if(x .eq. 0.d0) then + erf_H = (0.d0, 0.d0) + return + endif + + + if( (dabs(x) .lt. 10d0) .and. (yabs .lt. 6.1d0) ) then + + x2 = x * x + ct = 0.5d0 * inv_pi + + erf_H = 0.d0 + do i = 1, Nmax + + idble = dble(i) + tmp0 = 0.5d0 * idble + tmp1 = tmp0 * tmp0 + x2 + tmp2 = dexp(-tmp1-idble*yabs) * (x + (0.d0, 1.d0)*tmp0) / tmp1 + erf_H = erf_H + tmp2 + + tmp_mod = dsqrt(REAL(tmp2)*REAL(tmp2) + AIMAG(tmp2)*AIMAG(tmp2)) + if(tmp_mod .lt. 1d-15) exit + enddo + erf_H = ct * erf_H + + else + + erf_H = (0.d0, 0.d0) + + endif + +end function erf_H + +! --- + + diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index 15d79622..ff17ee4e 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -133,7 +133,7 @@ subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center, p, fact_k, io BEGIN_DOC ! Transforms the product of - ! (x-x_A)^a(1) (x-x_B)^b(1) (x-x_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta) + ! (x-x_A)^a(1) (x-x_B)^b(1) (y-y_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta) ! into ! fact_k * [ sum (l_x = 0,i_order(1)) P_new(l_x,1) * (x-P_center(1))^l_x ] exp (- p (x-P_center(1))^2 ) ! * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 ) @@ -427,6 +427,46 @@ subroutine gaussian_product_x(a,xa,b,xb,k,p,xp) end subroutine +!- + +subroutine gaussian_product_x_v(a,xa,b,xb,k,p,xp,n_points) + implicit none + BEGIN_DOC + ! Gaussian product in 1D with multiple xa + ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2} + END_DOC + + integer, intent(in) :: n_points + double precision , intent(in) :: a,b ! Exponents + double precision , intent(in) :: xa(n_points),xb ! Centers + double precision , intent(out) :: p(n_points) ! New exponent + double precision , intent(out) :: xp(n_points) ! New center + double precision , intent(out) :: k(n_points) ! Constant + + double precision :: p_inv + integer :: ipoint + + ASSERT (a>0.) + ASSERT (b>0.) + + double precision :: xab, ab + + p = a+b + p_inv = 1.d0/(a+b) + ab = a*b*p_inv + do ipoint = 1, n_points + xab = xa(ipoint)-xb + k(ipoint) = ab*xab*xab + if (k(ipoint) > 40.d0) then + k(ipoint)=0.d0 + cycle + endif + k(ipoint) = exp(-k(ipoint)) + xp(ipoint) = (a*xa(ipoint)+b*xb)*p_inv + enddo +end subroutine + + @@ -506,8 +546,10 @@ subroutine multiply_poly_v(b,nb,c,nc,d,nd,n_points) enddo enddo enddo + end + subroutine add_poly(b,nb,c,nc,d,nd) implicit none BEGIN_DOC @@ -1041,3 +1083,94 @@ double precision function rint1(n,rho) write(*,*)'pb in rint1 k too large!' stop 1 end + +! --- + +double precision function V_phi(n, m) + + BEGIN_DOC + ! Computes the angular $\phi$ part of the nuclear attraction integral: + ! + ! $\int_{0}^{2 \pi} \cos(\phi)^n \sin(\phi)^m d\phi$. + END_DOC + + implicit none + integer, intent(in) :: n, m + + integer :: i + double precision :: prod + + double precision :: Wallis + + prod = 1.d0 + do i = 0, shiftr(n, 1)-1 + prod = prod/ (1.d0 + dfloat(m+1)/dfloat(n-i-i-1)) + enddo + V_phi = 4.d0 * prod * Wallis(m) + +end function V_phi + +! --- + +double precision function V_theta(n, m) + + BEGIN_DOC + ! Computes the angular $\theta$ part of the nuclear attraction integral: + ! + ! $\int_{0}^{\pi} \cos(\theta)^n \sin(\theta)^m d\theta$ + END_DOC + + implicit none + include 'utils/constants.include.F' + integer, intent(in) :: n, m + + integer :: i + double precision :: prod + + double precision :: Wallis + + V_theta = 0.d0 + prod = 1.d0 + do i = 0, shiftr(n, 1)-1 + prod = prod / (1.d0 + dfloat(m+1)/dfloat(n-i-i-1)) + enddo + V_theta = (prod + prod) * Wallis(m) + +end function V_theta + +! --- + +double precision function Wallis(n) + + BEGIN_DOC + ! Wallis integral: + ! + ! $\int_{0}^{\pi} \cos(\theta)^n d\theta$. + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: n + + integer :: p + + double precision :: fact + + if(iand(n, 1) .eq. 0) then + + Wallis = fact(shiftr(n, 1)) + Wallis = pi * fact(n) / (dble(ibset(0_8, n)) * (Wallis + Wallis) * Wallis) + + else + + p = shiftr(n, 1) + Wallis = fact(p) + Wallis = dble(ibset(0_8, p+p)) * Wallis * Wallis / fact(p+p+1) + + endif + +end function Wallis + +! --- + diff --git a/src/utils/one_e_integration.irp.f b/src/utils/one_e_integration.irp.f index 081adee3..c797c87e 100644 --- a/src/utils/one_e_integration.irp.f +++ b/src/utils/one_e_integration.irp.f @@ -32,9 +32,8 @@ double precision function overlap_gaussian_x(A_center,B_center,alpha,beta,power_ end -subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,& - power_B,overlap_x,overlap_y,overlap_z,overlap,dim) - implicit none +subroutine overlap_gaussian_xyz(A_center, B_center, alpha, beta, power_A, power_B, overlap_x, overlap_y, overlap_z, overlap, dim) + BEGIN_DOC !.. math:: ! @@ -42,7 +41,10 @@ subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,& ! S = S_x S_y S_z ! END_DOC + include 'constants.include.F' + + implicit none integer,intent(in) :: dim ! dimension maximum for the arrays representing the polynomials double precision,intent(in) :: A_center(3),B_center(3) ! center of the x1 functions double precision, intent(in) :: alpha,beta @@ -51,17 +53,18 @@ subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,& double precision :: P_new(0:max_dim,3),P_center(3),fact_p,p double precision :: F_integral_tab(0:max_dim) integer :: iorder_p(3) - - call give_explicit_poly_and_gaussian(P_new,P_center,p,fact_p,iorder_p,alpha,beta,power_A,power_B,A_center,B_center,dim) - if(fact_p.lt.1d-20)then - overlap_x = 1.d-10 - overlap_y = 1.d-10 - overlap_z = 1.d-10 - overlap = 1.d-10 - return - endif integer :: nmax double precision :: F_integral + + call give_explicit_poly_and_gaussian(P_new, P_center, p, fact_p, iorder_p, alpha, beta, power_A, power_B, A_center, B_center, dim) + if(fact_p.lt.1d-20)then + overlap_x = 1.d-10 + overlap_y = 1.d-10 + overlap_z = 1.d-10 + overlap = 1.d-10 + return + endif + nmax = maxval(iorder_p) do i = 0,nmax F_integral_tab(i) = F_integral(i,p) @@ -93,40 +96,47 @@ subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,& end +! --- + +subroutine overlap_x_abs(A_center, B_center, alpha, beta, power_A, power_B, overlap_x, lower_exp_val, dx, nx) -subroutine overlap_x_abs(A_center,B_center,alpha,beta,power_A,power_B,overlap_x,lower_exp_val,dx,nx) - implicit none BEGIN_DOC ! .. math :: ! ! \int_{-infty}^{+infty} (x-A_center)^(power_A) * (x-B_center)^power_B * exp(-alpha(x-A_center)^2) * exp(-beta(x-B_center)^2) dx ! END_DOC - integer :: i,j,k,l - integer,intent(in) :: power_A,power_B - double precision, intent(in) :: lower_exp_val - double precision,intent(in) :: A_center, B_center,alpha,beta - double precision, intent(out) :: overlap_x,dx - integer, intent(in) :: nx - double precision :: x_min,x_max,domain,x,factor,dist,p,p_inv,rho - double precision :: P_center - if(power_A.lt.0.or.power_B.lt.0)then + + implicit none + + integer, intent(in) :: power_A, power_B, nx + double precision, intent(in) :: lower_exp_val, A_center, B_center, alpha, beta + double precision, intent(out) :: overlap_x, dx + + integer :: i, j, k, l + double precision :: x_min, x_max, domain, x, factor, dist, p, p_inv, rho + double precision :: P_center + double precision :: tmp + + if(power_A.lt.0 .or. power_B.lt.0) then overlap_x = 0.d0 dx = 0.d0 return endif - p = alpha + beta - p_inv= 1.d0/p - rho = alpha * beta * p_inv - dist = (A_center - B_center)*(A_center - B_center) + + p = alpha + beta + p_inv = 1.d0/p + rho = alpha * beta * p_inv + dist = (A_center - B_center)*(A_center - B_center) P_center = (alpha * A_center + beta * B_center) * p_inv - if(rho*dist.gt.80.d0)then + + if(rho*dist.gt.80.d0) then overlap_x= 0.d0 return endif + factor = dexp(-rho * dist) - double precision :: tmp tmp = dsqrt(lower_exp_val/p) x_min = P_center - tmp @@ -143,7 +153,7 @@ subroutine overlap_x_abs(A_center,B_center,alpha,beta,power_A,power_B,overlap_x, overlap_x = factor * dx * overlap_x end - +! --- subroutine overlap_gaussian_xyz_v(A_center, B_center, alpha, beta, power_A, power_B, overlap, n_points) @@ -173,7 +183,7 @@ subroutine overlap_gaussian_xyz_v(A_center, B_center, alpha, beta, power_A, powe double precision :: F_integral double precision, allocatable :: P_new(:,:,:), P_center(:,:), fact_p(:) - ldp = maxval(power_A(1:3) + power_B(1:3)) + ldp = maxval( power_A(1:3) + power_B(1:3) ) allocate(P_new(n_points,0:ldp,3), P_center(n_points,3), fact_p(n_points)) diff --git a/src/utils/util.irp.f b/src/utils/util.irp.f index 41e7cad6..aba99c2b 100644 --- a/src/utils/util.irp.f +++ b/src/utils/util.irp.f @@ -460,6 +460,33 @@ subroutine v2_over_x(v,x,res) end +! --- + +subroutine check_sym(A, n) + + implicit none + integer, intent(in) :: n + double precision, intent(in) :: A(n,n) + integer :: i, j + double precision :: dev_sym, norm, tmp + + dev_sym = 0.d0 + norm = 0.d0 + do i = 1, n + do j = i+1, n + tmp = A(j,i) - A(i,j) + dev_sym += tmp * tmp + norm += A(j,i) * A(j,i) + enddo + enddo + + print*, ' deviation from sym = ', dev_sym + print*, ' norm = ', norm + +end subroutine check_sym + +! --- + subroutine sum_A_At(A, N) !BEGIN_DOC From f475446d9dfb9294cb916053b8a6a0d1f52149f7 Mon Sep 17 00:00:00 2001 From: ydamour Date: Fri, 14 Apr 2023 16:35:06 +0200 Subject: [PATCH 02/29] Improved qp command --- bin/qp_test | 3 ++- etc/qp.rc | 12 +++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/bin/qp_test b/bin/qp_test index 288b7291..d3a188fb 100755 --- a/bin/qp_test +++ b/bin/qp_test @@ -46,7 +46,7 @@ def main(arguments): append_bats(dirname, filenames) else: for (dirname, _, filenames) in os.walk(os.getcwd(), followlinks=False): - if "IRPF90_temp" not in dirname: + if "IRPF90_temp" not in dirname and "external" not in dirname: append_bats(dirname, filenames) l_bats = [y for _, y in sorted(l_bats)] @@ -67,6 +67,7 @@ def main(arguments): os.system(test+" python3 bats_to_sh.py "+bats_file+ "| bash") else: +# print(" ".join(["bats", "--verbose-run", "--trace", bats_file])) subprocess.check_call(["bats", "--verbose-run", "--trace", bats_file], env=os.environ) diff --git a/etc/qp.rc b/etc/qp.rc index c56661c7..d339f475 100644 --- a/etc/qp.rc +++ b/etc/qp.rc @@ -110,6 +110,11 @@ function qp() unset COMMAND ;; + "test") + shift + qp_test $@ + ;; + *) which "qp_$1" &> /dev/null if [[ $? -eq 0 ]] ; then @@ -183,7 +188,7 @@ _qp_Complete() ;; esac;; set_file) - COMPREPLY=( $(compgen -W "$(for i in * ; do [[ -f ${i}/ezfio/.version ]] && echo $i ; done)" -- ${cur} ) ) + COMPREPLY=( $(compgen -W "$(for i in $(find . -name ezfio | sed 's/ezfio$/.version/') ; do [[ -f $i ]] && echo ${i%/.version} ; done)" -- ${cur} ) ) return 0 ;; plugins) @@ -215,10 +220,15 @@ _qp_Complete() return 0 ;; esac;; + test) + COMPREPLY=( $(compgen -W "-v -a " -- $cur ) ) + return 0 + ;; *) COMPREPLY=( $(compgen -W 'plugins set_file \ unset_file man \ create_ezfio \ + test \ convert_output_to_ezfio \ -h update' -- $cur ) ) From 79c9d91d1991b7f2561e089f00e808c5c8ceb881 Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 18 Apr 2023 11:20:36 +0200 Subject: [PATCH 03/29] missing script ccsd --- src/ccsd/org/TANGLE_org_mode.sh | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100755 src/ccsd/org/TANGLE_org_mode.sh diff --git a/src/ccsd/org/TANGLE_org_mode.sh b/src/ccsd/org/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/ccsd/org/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done From 0325e59ebef7fa8c0c6757d126215ee28db3021d Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 18 Apr 2023 11:22:04 +0200 Subject: [PATCH 04/29] remove old utils_trust_region --- src/utils_trust_region/EZFIO.cfg | 89 - src/utils_trust_region/NEED | 1 - src/utils_trust_region/README.rst | 5 - src/utils_trust_region/TANGLE_org_mode.sh | 7 - src/utils_trust_region/algo_trust.irp.f | 248 --- src/utils_trust_region/algo_trust.org | 593 ------ .../apply_mo_rotation.irp.f | 85 - src/utils_trust_region/apply_mo_rotation.org | 86 - src/utils_trust_region/mat_to_vec_index.irp.f | 61 - src/utils_trust_region/mat_to_vec_index.org | 63 - src/utils_trust_region/pi.h | 2 - src/utils_trust_region/rotation_matrix.irp.f | 443 ----- src/utils_trust_region/rotation_matrix.org | 454 ----- .../sub_to_full_rotation_matrix.irp.f | 64 - .../sub_to_full_rotation_matrix.org | 65 - .../trust_region_expected_e.irp.f | 119 -- .../trust_region_expected_e.org | 121 -- .../trust_region_optimal_lambda.irp.f | 1655 ---------------- .../trust_region_optimal_lambda.org | 1665 ----------------- src/utils_trust_region/trust_region_rho.irp.f | 121 -- src/utils_trust_region/trust_region_rho.org | 123 -- .../trust_region_step.irp.f | 716 ------- src/utils_trust_region/trust_region_step.org | 726 ------- src/utils_trust_region/vec_to_mat_index.irp.f | 71 - src/utils_trust_region/vec_to_mat_index.org | 72 - src/utils_trust_region/vec_to_mat_v2.irp.f | 39 - src/utils_trust_region/vec_to_mat_v2.org | 40 - 27 files changed, 7734 deletions(-) delete mode 100644 src/utils_trust_region/EZFIO.cfg delete mode 100644 src/utils_trust_region/NEED delete mode 100644 src/utils_trust_region/README.rst delete mode 100755 src/utils_trust_region/TANGLE_org_mode.sh delete mode 100644 src/utils_trust_region/algo_trust.irp.f delete mode 100644 src/utils_trust_region/algo_trust.org delete mode 100644 src/utils_trust_region/apply_mo_rotation.irp.f delete mode 100644 src/utils_trust_region/apply_mo_rotation.org delete mode 100644 src/utils_trust_region/mat_to_vec_index.irp.f delete mode 100644 src/utils_trust_region/mat_to_vec_index.org delete mode 100644 src/utils_trust_region/pi.h delete mode 100644 src/utils_trust_region/rotation_matrix.irp.f delete mode 100644 src/utils_trust_region/rotation_matrix.org delete mode 100644 src/utils_trust_region/sub_to_full_rotation_matrix.irp.f delete mode 100644 src/utils_trust_region/sub_to_full_rotation_matrix.org delete mode 100644 src/utils_trust_region/trust_region_expected_e.irp.f delete mode 100644 src/utils_trust_region/trust_region_expected_e.org delete mode 100644 src/utils_trust_region/trust_region_optimal_lambda.irp.f delete mode 100644 src/utils_trust_region/trust_region_optimal_lambda.org delete mode 100644 src/utils_trust_region/trust_region_rho.irp.f delete mode 100644 src/utils_trust_region/trust_region_rho.org delete mode 100644 src/utils_trust_region/trust_region_step.irp.f delete mode 100644 src/utils_trust_region/trust_region_step.org delete mode 100644 src/utils_trust_region/vec_to_mat_index.irp.f delete mode 100644 src/utils_trust_region/vec_to_mat_index.org delete mode 100644 src/utils_trust_region/vec_to_mat_v2.irp.f delete mode 100644 src/utils_trust_region/vec_to_mat_v2.org diff --git a/src/utils_trust_region/EZFIO.cfg b/src/utils_trust_region/EZFIO.cfg deleted file mode 100644 index 9c9f6248..00000000 --- a/src/utils_trust_region/EZFIO.cfg +++ /dev/null @@ -1,89 +0,0 @@ -[thresh_delta] -type: double precision -doc: Threshold to stop the optimization if the radius of the trust region delta < thresh_delta -interface: ezfio,provider,ocaml -default: 1.e-10 - -[thresh_rho] -type: double precision -doc: Threshold for the step acceptance in the trust region algorithm, if (rho .geq. thresh_rho) the step is accepted, else the step is cancelled and a smaller step is tried until (rho .geq. thresh_rho) -interface: ezfio,provider,ocaml -default: 0.1 - -[thresh_eig] -type: double precision -doc: Threshold to consider when an eigenvalue is 0 in the trust region algorithm -interface: ezfio,provider,ocaml -default: 1.e-12 - -[thresh_model] -type: double precision -doc: If if ABS(criterion - criterion_model) < thresh_model, the program exit the trust region algorithm -interface: ezfio,provider,ocaml -default: 1.e-12 - -[absolute_eig] -type: logical -doc: If True, the algorithm replace the eigenvalues of the hessian by their absolute value to compute the step (in the trust region) -interface: ezfio,provider,ocaml -default: false - -[thresh_wtg] -type: double precision -doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is equal to 0. Must be smaller than thresh_eig by several order of magnitude to avoid numerical problem. If the research of the optimal lambda cannot reach the condition (||x|| .eq. delta) because (||x|| .lt. delta), the reason might be that thresh_wtg is too big or/and thresh_eig is too small -interface: ezfio,provider,ocaml -default: 1.e-6 - -[thresh_wtg2] -type: double precision -doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is 0 in the case of avoid_saddle .eq. true. There is no particular reason to put a different value that thresh_wtg, but it can be useful one day -interface: ezfio,provider,ocaml -default: 1.e-6 - -[avoid_saddle] -type: logical -doc: Test to avoid saddle point, active if true -interface: ezfio,provider,ocaml -default: false - -[version_avoid_saddle] -type: integer -doc: cf. trust region, not stable -interface: ezfio,provider,ocaml -default: 3 - -[thresh_rho_2] -type: double precision -doc: Threshold for the step acceptance for the research of lambda in the trust region algorithm, if (rho_2 .geq. thresh_rho_2) the step is accepted, else the step is rejected -interface: ezfio,provider,ocaml -default: 0.1 - -[thresh_cc] -type: double precision -doc: Threshold to stop the research of the optimal lambda in the trust region algorithm when (dabs(1d0-||x||^2/delta^2) < thresh_cc) -interface: ezfio,provider,ocaml -default: 1.e-6 - -[thresh_model_2] -type: double precision -doc: if (ABS(criterion - criterion_model) < thresh_model_2), i.e., the difference between the actual criterion and the predicted next criterion, during the research of the optimal lambda in the trust region algorithm it prints a warning -interface: ezfio,provider,ocaml -default: 1.e-12 - -[version_lambda_search] -type: integer -doc: Research of the optimal lambda in the trust region algorithm to constrain the norm of the step by solving: 1 -> ||x||^2 - delta^2 .eq. 0, 2 -> 1/||x||^2 - 1/delta^2 .eq. 0 -interface: ezfio,provider,ocaml -default: 2 - -[nb_it_max_lambda] -type: integer -doc: Maximal number of iterations for the research of the optimal lambda in the trust region algorithm -interface: ezfio,provider,ocaml -default: 100 - -[nb_it_max_pre_search] -type: integer -doc: Maximal number of iterations for the pre-research of the optimal lambda in the trust region algorithm -interface: ezfio,provider,ocaml -default: 40 diff --git a/src/utils_trust_region/NEED b/src/utils_trust_region/NEED deleted file mode 100644 index 1a65ce38..00000000 --- a/src/utils_trust_region/NEED +++ /dev/null @@ -1 +0,0 @@ -hartree_fock diff --git a/src/utils_trust_region/README.rst b/src/utils_trust_region/README.rst deleted file mode 100644 index 6a0689b6..00000000 --- a/src/utils_trust_region/README.rst +++ /dev/null @@ -1,5 +0,0 @@ -============ -trust_region -============ - -The documentation can be found in the org files. diff --git a/src/utils_trust_region/TANGLE_org_mode.sh b/src/utils_trust_region/TANGLE_org_mode.sh deleted file mode 100755 index 059cbe7d..00000000 --- a/src/utils_trust_region/TANGLE_org_mode.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh - -list='ls *.org' -for element in $list -do - emacs --batch $element -f org-babel-tangle -done diff --git a/src/utils_trust_region/algo_trust.irp.f b/src/utils_trust_region/algo_trust.irp.f deleted file mode 100644 index eac17275..00000000 --- a/src/utils_trust_region/algo_trust.irp.f +++ /dev/null @@ -1,248 +0,0 @@ -! Algorithm for the trust region - -! step_in_trust_region: -! Computes the step in the trust region (delta) -! (automatically sets at the iteration 0 and which evolves during the -! process in function of the evolution of rho). The step is computing by -! constraining its norm with a lagrange multiplier. -! Since the calculation of the step is based on the Newton method, an -! estimation of the gain in energy is given using the Taylors series -! truncated at the second order (criterion_model). -! If (DABS(criterion-criterion_model) < 1d-12) then -! must_exit = .True. -! else -! must_exit = .False. - -! This estimation of the gain in energy is used by -! is_step_cancel_trust_region to say if the step is accepted or cancelled. - -! If the step must be cancelled, the calculation restart from the same -! hessian and gradient and recomputes the step but in a smaller trust -! region and so on until the step is accepted. If the step is accepted -! the hessian and the gradient are recomputed to produce a new step. - -! Example: - - -! !### Initialization ### -! delta = 0d0 -! nb_iter = 0 ! Must start at 0 !!! -! rho = 0.5d0 -! not_converged = .True. -! -! ! ### TODO ### -! ! Compute the criterion before the loop -! call #your_criterion(prev_criterion) -! -! do while (not_converged) -! ! ### TODO ## -! ! Call your gradient -! ! Call you hessian -! call #your_gradient(v_grad) (1D array) -! call #your_hessian(H) (2D array) -! -! ! ### TODO ### -! ! Diagonalization of the hessian -! call diagonalization_hessian(n,H,e_val,w) -! -! cancel_step = .True. ! To enter in the loop just after -! ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho -! do while (cancel_step) -! -! ! Hessian,gradient,Criterion -> x -! call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) -! -! if (must_exit) then -! ! ### Message ### -! ! if step_in_trust_region sets must_exit on true for numerical reasons -! print*,'algo_trust1 sends the message : Exit' -! !### exit ### -! endif -! -! !### TODO ### -! ! Compute x -> m_x -! ! Compute m_x -> R -! ! Apply R and keep the previous MOs... -! ! Update/touch -! ! Compute the new criterion/energy -> criterion -! -! call #your_routine_1D_to_2D_antisymmetric_array(x,m_x) -! call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R) -! call #your_routine_to_apply_the_rotation_matrix(R,prev_mos) -! -! TOUCH #your_variables -! -! call #your_criterion(criterion) -! -! ! Criterion -> step accepted or rejected -! call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) -! -! ! ### TODO ### -! !if (cancel_step) then -! ! Cancel the previous step (mo_coef = prev_mos if you keep them...) -! !endif -! #if (cancel_step) then -! #mo_coef = prev_mos -! #endif -! -! enddo -! -! !call save_mos() !### depend of the time for 1 iteration -! -! ! To exit the external loop if must_exit = .True. -! if (must_exit) then -! !### exit ### -! endif -! -! ! Step accepted, nb iteration + 1 -! nb_iter = nb_iter + 1 -! -! ! ### TODO ### -! !if (###Conditions###) then -! ! no_converged = .False. -! !endif -! #if (#your_conditions) then -! # not_converged = .False. -! #endif -! -! enddo - - - -! Variables: - -! Input: -! | n | integer | m*(m-1)/2 | -! | m | integer | number of mo in the mo_class | -! | H(n,n) | double precision | Hessian | -! | v_grad(n) | double precision | Gradient | -! | W(n,n) | double precision | Eigenvectors of the hessian | -! | e_val(n) | double precision | Eigenvalues of the hessian | -! | criterion | double precision | Actual criterion | -! | prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration | -! | rho | double precision | Given by is_step_cancel_trus_region | -! | | | Agreement between the real function and the Taylor series (2nd order) | -! | nb_iter | integer | Actual number of iterations | - -! Input/output: -! | delta | double precision | Radius of the trust region | - -! Output: -! | criterion_model | double precision | Predicted criterion after the rotation | -! | x(n) | double precision | Step | -! | must_exit | logical | If the program must exit the loop | - - -subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) - - include 'pi.h' - - BEGIN_DOC - ! Compute the step and the expected criterion/energy after the step - END_DOC - - implicit none - - ! in - integer, intent(in) :: n, nb_iter - double precision, intent(in) :: H(n,n), W(n,n), v_grad(n) - double precision, intent(in) :: rho, prev_criterion - - ! inout - double precision, intent(inout) :: delta, e_val(n) - - ! out - double precision, intent(out) :: criterion_model, x(n) - logical, intent(out) :: must_exit - - ! internal - integer :: info - - must_exit = .False. - - call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta) - - call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model) - - ! exit if DABS(prev_criterion - criterion_model) < 1d-12 - if (DABS(prev_criterion - criterion_model) < thresh_model) then - print*,'' - print*,'###############################################################################' - print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region' - print*,'###############################################################################' - print*,'' - must_exit = .True. - endif - - if (delta < thresh_delta) then - print*,'' - print*,'##############################################' - print*,'Delta <', thresh_delta, 'stop the trust region' - print*,'##############################################' - print*,'' - must_exit = .True. - endif - - ! Add after the call to this subroutine, a statement: - ! "if (must_exit) then - ! exit - ! endif" - ! in order to exit the optimization loop - -end subroutine - - - -! Variables: - -! Input: -! | nb_iter | integer | actual number of iterations | -! | prev_criterion | double precision | criterion before the application of the step x | -! | criterion | double precision | criterion after the application of the step x | -! | criterion_model | double precision | predicted criterion after the application of x | - -! Output: -! | rho | double precision | Agreement between the predicted criterion and the real new criterion | -! | cancel_step | logical | If the step must be cancelled | - - -subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) - - include 'pi.h' - - BEGIN_DOC - ! Compute if the step should be cancelled - END_DOC - - implicit none - - ! in - double precision, intent(in) :: prev_criterion, criterion, criterion_model - - ! inout - integer, intent(inout) :: nb_iter - - ! out - logical, intent(out) :: cancel_step - double precision, intent(out) :: rho - - ! Computes rho - call trust_region_rho(prev_criterion,criterion,criterion_model,rho) - - if (nb_iter == 0) then - nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled - endif - - ! If rho < thresh_rho -> give something in output to cancel the step - if (rho >= thresh_rho) then !0.1d0) then - ! The step is accepted - cancel_step = .False. - else - ! The step is rejected - cancel_step = .True. - print*, '***********************' - print*, 'Step cancel : rho <', thresh_rho - print*, '***********************' - endif - -end subroutine diff --git a/src/utils_trust_region/algo_trust.org b/src/utils_trust_region/algo_trust.org deleted file mode 100644 index aa836f98..00000000 --- a/src/utils_trust_region/algo_trust.org +++ /dev/null @@ -1,593 +0,0 @@ -* Algorithm for the trust region - -step_in_trust_region: -Computes the step in the trust region (delta) -(automatically sets at the iteration 0 and which evolves during the -process in function of the evolution of rho). The step is computing by -constraining its norm with a lagrange multiplier. -Since the calculation of the step is based on the Newton method, an -estimation of the gain in energy is given using the Taylors series -truncated at the second order (criterion_model). -If (DABS(criterion-criterion_model) < 1d-12) then - must_exit = .True. -else - must_exit = .False. - -This estimation of the gain in energy is used by -is_step_cancel_trust_region to say if the step is accepted or cancelled. - -If the step must be cancelled, the calculation restart from the same -hessian and gradient and recomputes the step but in a smaller trust -region and so on until the step is accepted. If the step is accepted -the hessian and the gradient are recomputed to produce a new step. - -Example: - -#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f -! !### Initialization ### -! delta = 0d0 -! nb_iter = 0 ! Must start at 0 !!! -! rho = 0.5d0 -! not_converged = .True. -! -! ! ### TODO ### -! ! Compute the criterion before the loop -! call #your_criterion(prev_criterion) -! -! do while (not_converged) -! ! ### TODO ## -! ! Call your gradient -! ! Call you hessian -! call #your_gradient(v_grad) (1D array) -! call #your_hessian(H) (2D array) -! -! ! ### TODO ### -! ! Diagonalization of the hessian -! call diagonalization_hessian(n,H,e_val,w) -! -! cancel_step = .True. ! To enter in the loop just after -! ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho -! do while (cancel_step) -! -! ! Hessian,gradient,Criterion -> x -! call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) -! -! if (must_exit) then -! ! ### Message ### -! ! if step_in_trust_region sets must_exit on true for numerical reasons -! print*,'algo_trust1 sends the message : Exit' -! !### exit ### -! endif -! -! !### TODO ### -! ! Compute x -> m_x -! ! Compute m_x -> R -! ! Apply R and keep the previous MOs... -! ! Update/touch -! ! Compute the new criterion/energy -> criterion -! -! call #your_routine_1D_to_2D_antisymmetric_array(x,m_x) -! call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R) -! call #your_routine_to_apply_the_rotation_matrix(R,prev_mos) -! -! TOUCH #your_variables -! -! call #your_criterion(criterion) -! -! ! Criterion -> step accepted or rejected -! call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) -! -! ! ### TODO ### -! !if (cancel_step) then -! ! Cancel the previous step (mo_coef = prev_mos if you keep them...) -! !endif -! #if (cancel_step) then -! #mo_coef = prev_mos -! #endif -! -! enddo -! -! !call save_mos() !### depend of the time for 1 iteration -! -! ! To exit the external loop if must_exit = .True. -! if (must_exit) then -! !### exit ### -! endif -! -! ! Step accepted, nb iteration + 1 -! nb_iter = nb_iter + 1 -! -! ! ### TODO ### -! !if (###Conditions###) then -! ! no_converged = .False. -! !endif -! #if (#your_conditions) then -! # not_converged = .False. -! #endif -! -! enddo -#+END_SRC - -Variables: - -Input: -| n | integer | m*(m-1)/2 | -| m | integer | number of mo in the mo_class | -| H(n,n) | double precision | Hessian | -| v_grad(n) | double precision | Gradient | -| W(n,n) | double precision | Eigenvectors of the hessian | -| e_val(n) | double precision | Eigenvalues of the hessian | -| criterion | double precision | Actual criterion | -| prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration | -| rho | double precision | Given by is_step_cancel_trus_region | -| | | Agreement between the real function and the Taylor series (2nd order) | -| nb_iter | integer | Actual number of iterations | - -Input/output: -| delta | double precision | Radius of the trust region | - -Output: -| criterion_model | double precision | Predicted criterion after the rotation | -| x(n) | double precision | Step | -| must_exit | logical | If the program must exit the loop | - -#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f -subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) - - include 'pi.h' - - BEGIN_DOC - ! Compute the step and the expected criterion/energy after the step - END_DOC - - implicit none - - ! in - integer, intent(in) :: n, nb_iter - double precision, intent(in) :: H(n,n), W(n,n), v_grad(n) - double precision, intent(in) :: rho, prev_criterion - - ! inout - double precision, intent(inout) :: delta, e_val(n) - - ! out - double precision, intent(out) :: criterion_model, x(n) - logical, intent(out) :: must_exit - - ! internal - integer :: info - - must_exit = .False. - - call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta) - - call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model) - - ! exit if DABS(prev_criterion - criterion_model) < 1d-12 - if (DABS(prev_criterion - criterion_model) < thresh_model) then - print*,'' - print*,'###############################################################################' - print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region' - print*,'###############################################################################' - print*,'' - must_exit = .True. - endif - - if (delta < thresh_delta) then - print*,'' - print*,'##############################################' - print*,'Delta <', thresh_delta, 'stop the trust region' - print*,'##############################################' - print*,'' - must_exit = .True. - endif - - ! Add after the call to this subroutine, a statement: - ! "if (must_exit) then - ! exit - ! endif" - ! in order to exit the optimization loop - -end subroutine -#+END_SRC - -Variables: - -Input: -| nb_iter | integer | actual number of iterations | -| prev_criterion | double precision | criterion before the application of the step x | -| criterion | double precision | criterion after the application of the step x | -| criterion_model | double precision | predicted criterion after the application of x | - -Output: -| rho | double precision | Agreement between the predicted criterion and the real new criterion | -| cancel_step | logical | If the step must be cancelled | - -#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f -subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) - - include 'pi.h' - - BEGIN_DOC - ! Compute if the step should be cancelled - END_DOC - - implicit none - - ! in - double precision, intent(in) :: prev_criterion, criterion, criterion_model - - ! inout - integer, intent(inout) :: nb_iter - - ! out - logical, intent(out) :: cancel_step - double precision, intent(out) :: rho - - ! Computes rho - call trust_region_rho(prev_criterion,criterion,criterion_model,rho) - - if (nb_iter == 0) then - nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled - endif - - ! If rho < thresh_rho -> give something in output to cancel the step - if (rho >= thresh_rho) then !0.1d0) then - ! The step is accepted - cancel_step = .False. - else - ! The step is rejected - cancel_step = .True. - print*, '***********************' - print*, 'Step cancel : rho <', thresh_rho - print*, '***********************' - endif - -end subroutine -#+END_SRC - -** Template for MOs -#+BEGIN_SRC f90 :comments org :tangle trust_region_template_mos.txt -subroutine algo_trust_template(tmp_n, tmp_list_size, tmp_list) - - implicit none - - ! Variables - - ! In - integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) - - ! Out - ! Rien ou un truc pour savoir si ça c'est bien passé - - ! Internal - double precision, allocatable :: e_val(:), W(:,:), tmp_R(:,:), R(:,:), tmp_x(:), tmp_m_x(:,:) - double precision, allocatable :: prev_mos(:,:) - double precision :: criterion, prev_criterion, criterion_model - double precision :: delta, rho - logical :: not_converged, cancel_step, must_exit, enforce_step_cancellation - integer :: nb_iter, info, nb_sub_iter - integer :: i,j,tmp_i,tmp_j - - allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n),tmp_m_x(tmp_list_size, tmp_list_size)) - allocate(tmp_R(tmp_list_size, tmp_list_size), R(mo_num, mo_num)) - allocate(prev_mos(ao_num, mo_num)) - - ! Provide the criterion, but unnecessary because it's done - ! automatically - PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER - - ! Initialization - delta = 0d0 - nb_iter = 0 ! Must start at 0 !!! - rho = 0.5d0 ! Must start at 0.5 - not_converged = .True. ! Must be true - - ! Compute the criterion before the loop - prev_criterion = C_PROVIDER - - do while (not_converged) - - print*,'' - print*,'******************' - print*,'Iteration', nb_iter - print*,'******************' - print*,'' - - ! The new hessian and gradient are computed at the end of the previous iteration - ! Diagonalization of the hessian - call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) - - cancel_step = .True. ! To enter in the loop just after - nb_sub_iter = 0 - - ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho - do while (cancel_step) - - print*,'-----------------------------' - print*,'Iteration:', nb_iter - print*,'Sub iteration:', nb_sub_iter - print*,'-----------------------------' - - ! Hessian,gradient,Criterion -> x - call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & - prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit) - - if (must_exit) then - ! if step_in_trust_region sets must_exit on true for numerical reasons - print*,'trust_region_step_w_expected_e sent the message : Exit' - exit - endif - - ! 1D tmp -> 2D tmp - call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) - - ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) - call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, info, enforce_step_cancellation) - - if (enforce_step_cancellation) then - print*, 'Forces the step cancellation, too large error in the rotation matrix' - rho = 0d0 - cycle - endif - - ! tmp_R to R, subspace to full space - call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) - - ! Rotation of the MOs - call apply_mo_rotation(R, prev_mos) - - ! touch mo_coef - call clear_mo_map ! Only if you are using the bi-electronic integrals - ! mo_coef becomes valid - ! And avoid the recomputation of the providers which depend of mo_coef - TOUCH mo_coef C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER - - ! To update the other parameters if needed - call #update_parameters() - - ! To enforce the program to provide new criterion after the update - ! of the parameters - FREE C_PROVIDER - PROVIDE C_PROVIDER - criterion = C_PROVIDER - - ! Criterion -> step accepted or rejected - call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) - - ! Cancellation of the step ? - if (cancel_step) then - ! Replacement by the previous MOs - mo_coef = prev_mos - ! call save_mos() ! depends of the time for 1 iteration - - ! No need to clear_mo_map since we don't recompute the gradient and the hessian - ! mo_coef becomes valid - ! Avoid the recomputation of the providers which depend of mo_coef - TOUCH mo_coef H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER - else - ! The step is accepted: - ! criterion -> prev criterion - - ! The replacement "criterion -> prev criterion" is already done - ! in trust_region_rho, so if the criterion does not have a reason - ! to change, it will change nothing for the criterion and will - ! force the program to provide the new hessian, gradient and - ! convergence criterion for the next iteration. - ! But in the case of orbital optimization we diagonalize the CI - ! matrix after the "FREE" statement, so the criterion will change - - FREE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER - PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER - prev_criterion = C_PROVIDER - - endif - - nb_sub_iter = nb_sub_iter + 1 - enddo - - ! call save_mos() ! depends of the time for 1 iteration - - ! To exit the external loop if must_exit = .True. - if (must_exit) then - exit - endif - - ! Step accepted, nb iteration + 1 - nb_iter = nb_iter + 1 - - ! Provide the convergence criterion - ! Provide the gradient and the hessian for the next iteration - PROVIDE cc_PROVIDER - - ! To exit - if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then - not_converged = .False. - endif - - if (nb_iter > optimization_max_nb_iter) then - not_converged = .False. - endif - - if (delta < thresh_delta) then - not_converged = .False. - endif - - enddo - - ! Save the final MOs - call save_mos() - - ! Diagonalization of the hessian - ! (To see the eigenvalues at the end of the optimization) - call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) - - deallocate(e_val, W, tmp_R, R, tmp_x, prev_mos) - -end -#+END_SRC - -** Cartesian version -#+BEGIN_SRC f90 :comments org :tangle trust_region_template_xyz.txt -subroutine algo_trust_cartesian_template(tmp_n) - - implicit none - - ! Variables - - ! In - integer, intent(in) :: tmp_n - - ! Out - ! Rien ou un truc pour savoir si ça c'est bien passé - - ! Internal - double precision, allocatable :: e_val(:), W(:,:), tmp_x(:) - double precision :: criterion, prev_criterion, criterion_model - double precision :: delta, rho - logical :: not_converged, cancel_step, must_exit - integer :: nb_iter, nb_sub_iter - integer :: i,j - - allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n)) - - PROVIDE C_PROVIDER X_PROVIDER H_PROVIDER g_PROVIDER - - ! Initialization - delta = 0d0 - nb_iter = 0 ! Must start at 0 !!! - rho = 0.5d0 ! Must start at 0.5 - not_converged = .True. ! Must be true - - ! Compute the criterion before the loop - prev_criterion = C_PROVIDER - - do while (not_converged) - - print*,'' - print*,'******************' - print*,'Iteration', nb_iter - print*,'******************' - print*,'' - - if (nb_iter > 0) then - PROVIDE H_PROVIDER g_PROVIDER - endif - - ! Diagonalization of the hessian - call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) - - cancel_step = .True. ! To enter in the loop just after - nb_sub_iter = 0 - - ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho - do while (cancel_step) - - print*,'-----------------------------' - print*,'Iteration:', nb_iter - print*,'Sub iteration:', nb_sub_iter - print*,'-----------------------------' - - ! Hessian,gradient,Criterion -> x - call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & - prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit) - - if (must_exit) then - ! if step_in_trust_region sets must_exit on true for numerical reasons - print*,'trust_region_step_w_expected_e sent the message : Exit' - exit - endif - - ! New coordinates, check the sign - X_PROVIDER = X_PROVIDER - tmp_x - - ! touch X_PROVIDER - TOUCH X_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER - - ! To update the other parameters if needed - call #update_parameters() - - ! New criterion - PROVIDE C_PROVIDER ! Unnecessary - criterion = C_PROVIDER - - ! Criterion -> step accepted or rejected - call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) - - ! Cancel the previous step - if (cancel_step) then - ! Replacement by the previous coordinates, check the sign - X_PROVIDER = X_PROVIDER + tmp_x - - ! Avoid the recomputation of the hessian and the gradient - TOUCH X_PROVIDER H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER - endif - - nb_sub_iter = nb_sub_iter + 1 - enddo - - ! To exit the external loop if must_exit = .True. - if (must_exit) then - exit - endif - - ! Step accepted, nb iteration + 1 - nb_iter = nb_iter + 1 - - PROVIDE cc_PROVIDER - - ! To exit - if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then - not_converged = .False. - endif - - if (nb_iter > optimization_max_nb_iter) then - not_converged = .False. - endif - - if (delta < thresh_delta) then - not_converged = .False. - endif - - enddo - - deallocate(e_val, W, tmp_x) - -end -#+END_SRC - -** Script template -#+BEGIN_SRC bash :tangle script_template_mos.sh -#!/bin/bash - -your_file= - -your_C_PROVIDER= -your_H_PROVIDER= -your_g_PROVIDER= -your_cc_PROVIDER= - -sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_mos.txt > $your_file -sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file -sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file -sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file -#+END_SRC - -#+BEGIN_SRC bash :tangle script_template_xyz.sh -#!/bin/bash - -your_file= - -your_C_PROVIDER= -your_X_PROVIDER= -your_H_PROVIDER= -your_g_PROVIDER= -your_cc_PROVIDER= - -sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_xyz.txt > $your_file -sed -i "s/X_PROVIDER/$your_X_PROVIDER/g" $your_file -sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file -sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file -sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file -#+END_SRC - diff --git a/src/utils_trust_region/apply_mo_rotation.irp.f b/src/utils_trust_region/apply_mo_rotation.irp.f deleted file mode 100644 index e274ec11..00000000 --- a/src/utils_trust_region/apply_mo_rotation.irp.f +++ /dev/null @@ -1,85 +0,0 @@ -! Apply MO rotation -! Subroutine to apply the rotation matrix to the coefficients of the -! MOs. - -! New MOs = Old MOs . Rotation matrix - -! *Compute the new MOs with the previous MOs and a rotation matrix* - -! Provided: -! | mo_num | integer | number of MOs | -! | ao_num | integer | number of AOs | -! | mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs | - -! Intent in: -! | R(mo_num,mo_num) | double precision | rotation matrix | - -! Intent out: -! | prev_mos(ao_num,mo_num) | double precision | MOs before the rotation | - -! Internal: -! | new_mos(ao_num,mo_num) | double precision | MOs after the rotation | -! | i,j | integer | indexes | - -subroutine apply_mo_rotation(R,prev_mos) - - include 'pi.h' - - BEGIN_DOC - ! Compute the new MOs knowing the rotation matrix - END_DOC - - implicit none - - ! Variables - - ! in - double precision, intent(in) :: R(mo_num,mo_num) - - ! out - double precision, intent(out) :: prev_mos(ao_num,mo_num) - - ! internal - double precision, allocatable :: new_mos(:,:) - integer :: i,j - double precision :: t1,t2,t3 - - print*,'' - print*,'---apply_mo_rotation---' - - call wall_time(t1) - - ! Allocation - allocate(new_mos(ao_num,mo_num)) - - ! Calculation - - ! Product of old MOs (mo_coef) by Rotation matrix (R) - call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1)) - - prev_mos = mo_coef - mo_coef = new_mos - - !if (debug) then - ! print*,'New mo_coef : ' - ! do i = 1, mo_num - ! write(*,'(100(F10.5))') mo_coef(i,:) - ! enddo - !endif - - ! Save the new MOs and change the label - mo_label = 'MCSCF' - !call save_mos - call ezfio_set_determinants_mo_label(mo_label) - - !print*,'Done, MOs saved' - - ! Deallocation, end - deallocate(new_mos) - - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in apply mo rotation:', t3 - print*,'---End apply_mo_rotation---' - -end subroutine diff --git a/src/utils_trust_region/apply_mo_rotation.org b/src/utils_trust_region/apply_mo_rotation.org deleted file mode 100644 index 918581b7..00000000 --- a/src/utils_trust_region/apply_mo_rotation.org +++ /dev/null @@ -1,86 +0,0 @@ -* Apply MO rotation -Subroutine to apply the rotation matrix to the coefficients of the -MOs. - -New MOs = Old MOs . Rotation matrix - -*Compute the new MOs with the previous MOs and a rotation matrix* - -Provided: -| mo_num | integer | number of MOs | -| ao_num | integer | number of AOs | -| mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs | - -Intent in: -| R(mo_num,mo_num) | double precision | rotation matrix | - -Intent out: -| prev_mos(ao_num,mo_num) | double precision | MOs before the rotation | - -Internal: -| new_mos(ao_num,mo_num) | double precision | MOs after the rotation | -| i,j | integer | indexes | -#+BEGIN_SRC f90 :comments org :tangle apply_mo_rotation.irp.f -subroutine apply_mo_rotation(R,prev_mos) - - include 'pi.h' - - BEGIN_DOC - ! Compute the new MOs knowing the rotation matrix - END_DOC - - implicit none - - ! Variables - - ! in - double precision, intent(in) :: R(mo_num,mo_num) - - ! out - double precision, intent(out) :: prev_mos(ao_num,mo_num) - - ! internal - double precision, allocatable :: new_mos(:,:) - integer :: i,j - double precision :: t1,t2,t3 - - print*,'' - print*,'---apply_mo_rotation---' - - call wall_time(t1) - - ! Allocation - allocate(new_mos(ao_num,mo_num)) - - ! Calculation - - ! Product of old MOs (mo_coef) by Rotation matrix (R) - call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1)) - - prev_mos = mo_coef - mo_coef = new_mos - - !if (debug) then - ! print*,'New mo_coef : ' - ! do i = 1, mo_num - ! write(*,'(100(F10.5))') mo_coef(i,:) - ! enddo - !endif - - ! Save the new MOs and change the label - mo_label = 'MCSCF' - !call save_mos - call ezfio_set_determinants_mo_label(mo_label) - - !print*,'Done, MOs saved' - - ! Deallocation, end - deallocate(new_mos) - - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in apply mo rotation:', t3 - print*,'---End apply_mo_rotation---' - -end subroutine -#+END_SRC diff --git a/src/utils_trust_region/mat_to_vec_index.irp.f b/src/utils_trust_region/mat_to_vec_index.irp.f deleted file mode 100644 index 35e12232..00000000 --- a/src/utils_trust_region/mat_to_vec_index.irp.f +++ /dev/null @@ -1,61 +0,0 @@ -! Matrix to vector index - -! *Compute the index i of a vector element from the indexes p,q of a -! matrix element* - -! Lower diagonal matrix (p,q), p > q -> vector (i) - -! If a matrix is antisymmetric it can be reshaped as a vector. And the -! vector can be reshaped as an antisymmetric matrix - -! \begin{align*} -! \begin{pmatrix} -! 0 & -1 & -2 & -4 \\ -! 1 & 0 & -3 & -5 \\ -! 2 & 3 & 0 & -6 \\ -! 4 & 5 & 6 & 0 -! \end{pmatrix} -! \Leftrightarrow -! \begin{pmatrix} -! 1 & 2 & 3 & 4 & 5 & 6 -! \end{pmatrix} -! \end{align*} - -! !!! Here the algorithm only work for the lower diagonal !!! - -! Input: -! | p,q | integer | indexes of a matrix element in the lower diagonal | -! | | | p > q, q -> column | -! | | | p -> row, | -! | | | q -> column | - -! Input: -! | i | integer | corresponding index in the vector | - - -subroutine mat_to_vec_index(p,q,i) - - include 'pi.h' - - implicit none - - ! Variables - - ! in - integer, intent(in) :: p,q - - ! out - integer, intent(out) :: i - - ! internal - integer :: a,b - double precision :: da - - ! Calculation - - a = p-1 - b = a*(a-1)/2 - - i = q+b - -end subroutine diff --git a/src/utils_trust_region/mat_to_vec_index.org b/src/utils_trust_region/mat_to_vec_index.org deleted file mode 100644 index 50840584..00000000 --- a/src/utils_trust_region/mat_to_vec_index.org +++ /dev/null @@ -1,63 +0,0 @@ -* Matrix to vector index - -*Compute the index i of a vector element from the indexes p,q of a -matrix element* - -Lower diagonal matrix (p,q), p > q -> vector (i) - -If a matrix is antisymmetric it can be reshaped as a vector. And the -vector can be reshaped as an antisymmetric matrix - -\begin{align*} -\begin{pmatrix} -0 & -1 & -2 & -4 \\ -1 & 0 & -3 & -5 \\ -2 & 3 & 0 & -6 \\ -4 & 5 & 6 & 0 -\end{pmatrix} -\Leftrightarrow -\begin{pmatrix} -1 & 2 & 3 & 4 & 5 & 6 -\end{pmatrix} -\end{align*} - -!!! Here the algorithm only work for the lower diagonal !!! - -Input: -| p,q | integer | indexes of a matrix element in the lower diagonal | -| | | p > q, q -> column | -| | | p -> row, | -| | | q -> column | - -Input: -| i | integer | corresponding index in the vector | - -#+BEGIN_SRC f90 :comments org :tangle mat_to_vec_index.irp.f -subroutine mat_to_vec_index(p,q,i) - - include 'pi.h' - - implicit none - - ! Variables - - ! in - integer, intent(in) :: p,q - - ! out - integer, intent(out) :: i - - ! internal - integer :: a,b - double precision :: da - - ! Calculation - - a = p-1 - b = a*(a-1)/2 - - i = q+b - -end subroutine -#+END_SRC - diff --git a/src/utils_trust_region/pi.h b/src/utils_trust_region/pi.h deleted file mode 100644 index bbfabfec..00000000 --- a/src/utils_trust_region/pi.h +++ /dev/null @@ -1,2 +0,0 @@ - !logical, parameter :: debug=.False. - double precision, parameter :: pi = 3.1415926535897932d0 diff --git a/src/utils_trust_region/rotation_matrix.irp.f b/src/utils_trust_region/rotation_matrix.irp.f deleted file mode 100644 index 4738fd67..00000000 --- a/src/utils_trust_region/rotation_matrix.irp.f +++ /dev/null @@ -1,443 +0,0 @@ -! Rotation matrix - -! *Build a rotation matrix from an antisymmetric matrix* - -! Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as : -! $$ -! \textbf{R}=\exp(\textbf{A}) -! $$ - -! So : -! \begin{align*} -! \textbf{R}=& \exp(\textbf{A}) \\ -! =& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\ -! =& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A} -! \end{align*} - -! With : -! $\textbf{W}$ : eigenvectors of $\textbf{A}^2$ -! $\tau$ : $\sqrt{-x}$ -! $x$ : eigenvalues of $\textbf{A}^2$ - -! Input: -! | A(n,n) | double precision | antisymmetric matrix | -! | n | integer | number of columns of the A matrix | -! | LDA | integer | specifies the leading dimension of A, must be at least max(1,n) | -! | LDR | integer | specifies the leading dimension of R, must be at least max(1,n) | - -! Output: -! | R(n,n) | double precision | Rotation matrix | -! | info | integer | if info = 0, the execution is successful | -! | | | if info = k, the k-th parameter has an illegal value | -! | | | if info = -k, the algorithm failed | - -! Internal: -! | B(n,n) | double precision | B = A.A | -! | work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork) | -! | lwork | integer | dimension of the syev work array >= max(1, 3n-1) | -! | W(n,n) | double precision | eigenvectors of B | -! | e_val(n) | double precision | eigenvalues of B | -! | m_diag(n,n) | double precision | diagonal matrix with the eigenvalues of B | -! | cos_tau(n,n) | double precision | diagonal matrix with cos(tau) values | -! | sin_tau(n,n) | double precision | diagonal matrix with sin cos(tau) values | -! | tau_m1(n,n) | double precision | diagonal matrix with (tau)^-1 values | -! | part_1(n,n) | double precision | matrix W.cos_tau.W^t | -! | part_1a(n,n) | double precision | matrix cos_tau.W^t | -! | part_2(n,n) | double precision | matrix W.tau_m1.sin_tau.W^t.A | -! | part_2a(n,n) | double precision | matrix W^t.A | -! | part_2b(n,n) | double precision | matrix sin_tau.W^t.A | -! | part_2c(n,n) | double precision | matrix tau_m1.sin_tau.W^t.A | -! | RR_t(n,n) | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 | -! | norm | integer | norm of R.R^t-1, must be equal to 0 | -! | i,j | integer | indexes | - -! Functions: -! | dnrm2 | double precision | Lapack function, compute the norm of a matrix | -! | disnan | logical | Lapack function, check if an element is NaN | - - - -subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) - - implicit none - - BEGIN_DOC - ! Rotation matrix to rotate the molecular orbitals. - ! If the rotation is too large the transformation is not unitary and must be cancelled. - END_DOC - - include 'pi.h' - - ! Variables - - ! in - integer, intent(in) :: n,LDA,LDR - double precision, intent(inout) :: A(LDA,n) - - ! out - double precision, intent(out) :: R(LDR,n) - integer, intent(out) :: info - logical, intent(out) :: enforce_step_cancellation - - ! internal - double precision, allocatable :: B(:,:) - double precision, allocatable :: work(:,:) - double precision, allocatable :: W(:,:), e_val(:) - double precision, allocatable :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:) - double precision, allocatable :: part_1(:,:),part_1a(:,:) - double precision, allocatable :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:) - double precision, allocatable :: RR_t(:,:) - integer :: i,j - integer :: info2, lwork ! for dsyev - double precision :: norm, max_elem, max_elem_A, t1,t2,t3 - - ! function - double precision :: dnrm2 - logical :: disnan - - print*,'' - print*,'---rotation_matrix---' - - call wall_time(t1) - - ! Allocation - allocate(B(n,n)) - allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n)) - allocate(W(n,n),e_val(n)) - allocate(part_1(n,n),part_1a(n,n)) - allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n)) - allocate(RR_t(n,n)) - -! Pre-conditions - -! Initialization -info=0 -enforce_step_cancellation = .False. - -! Size of matrix A must be at least 1 by 1 -if (n<1) then - info = 3 - print*, 'WARNING: invalid parameter 5' - print*, 'n<1' - return -endif - -! Leading dimension of A must be >= n -if (LDA < n) then - info = 25 - print*, 'WARNING: invalid parameter 2 or 5' - print*, 'LDA < n' - return -endif - -! Leading dimension of A must be >= n -if (LDR < n) then - info = 4 - print*, 'WARNING: invalid parameter 4' - print*, 'LDR < n' - return -endif - -! Matrix elements of A must by non-NaN -do j = 1, n - do i = 1, n - if (disnan(A(i,j))) then - info=1 - print*, 'WARNING: invalid parameter 1' - print*, 'NaN element in A matrix' - return - endif - enddo -enddo - -do i = 1, n - if (A(i,i) /= 0d0) then - print*, 'WARNING: matrix A is not antisymmetric' - print*, 'Non 0 element on the diagonal', i, A(i,i) - call ABORT - endif -enddo - -do j = 1, n - do i = 1, n - if (A(i,j)+A(j,i)>1d-16) then - print*, 'WANRING: matrix A is not antisymmetric' - print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i) - print*, 'diff:', A(i,j)+A(j,i) - call ABORT - endif - enddo -enddo - -! Fix for too big elements ! bad idea better to cancel if the error is too big -!do j = 1, n -! do i = 1, n -! A(i,j) = mod(A(i,j),2d0*pi) -! if (dabs(A(i,j)) > pi) then -! A(i,j) = 0d0 -! endif -! enddo -!enddo - -max_elem_A = 0d0 -do j = 1, n - do i = 1, n - if (ABS(A(i,j)) > ABS(max_elem_A)) then - max_elem_A = A(i,j) - endif - enddo -enddo -print*,'max element in A', max_elem_A - -if (ABS(max_elem_A) > 2 * pi) then - print*,'' - print*,'WARNING: ABS(max_elem_A) > 2 pi ' - print*,'' -endif - -! B=A.A -! - Calculation of the matrix $\textbf{B} = \textbf{A}^2$ -! - Diagonalization of $\textbf{B}$ -! W, the eigenvectors -! e_val, the eigenvalues - - -! Compute B=A.A - -call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1)) - -! Copy B in W, diagonalization will put the eigenvectors in W -W=B - -! Diagonalization of B -! Eigenvalues -> e_val -! Eigenvectors -> W -lwork = 3*n-1 -allocate(work(lwork,n)) - -print*,'Starting diagonalization ...' - -call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2) - -deallocate(work) - -if (info2 == 0) then - print*, 'Diagonalization : Done' -elseif (info2 < 0) then - print*, 'WARNING: error in the diagonalization' - print*, 'Illegal value of the ', info2,'-th parameter' -else - print*, "WARNING: Diagonalization failed to converge" -endif - -! Tau^-1, cos(tau), sin(tau) -! $$\tau = \sqrt{-x}$$ -! - Calculation of $\cos(\tau)$ $\Leftrightarrow$ $\cos(\sqrt{-x})$ -! - Calculation of $\sin(\tau)$ $\Leftrightarrow$ $\sin(\sqrt{-x})$ -! - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$ -! These matrices are diagonals - -! Diagonal matrix m_diag -do j = 1, n - if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems - e_val(j) = 0.d0 - else - e_val(j) = - e_val(j) - endif -enddo - -m_diag = 0.d0 -do i = 1, n - m_diag(i,i) = e_val(i) -enddo - -! cos_tau -do j = 1, n - do i = 1, n - if (i==j) then - cos_tau(i,j) = dcos(dsqrt(e_val(i))) - else - cos_tau(i,j) = 0d0 - endif - enddo -enddo - -! sin_tau -do j = 1, n - do i = 1, n - if (i==j) then - sin_tau(i,j) = dsin(dsqrt(e_val(i))) - else - sin_tau(i,j) = 0d0 - endif - enddo -enddo - -! Debug, display the cos_tau and sin_tau matrix -!if (debug) then -! print*, 'cos_tau' -! do i = 1, n -! print*, cos_tau(i,:) -! enddo -! print*, 'sin_tau' -! do i = 1, n -! print*, sin_tau(i,:) -! enddo -!endif - -! tau^-1 -do j = 1, n - do i = 1, n - if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small - tau_m1(i,j) = 1d0/(dsqrt(e_val(i))) - else - tau_m1(i,j) = 0d0 - endif - enddo -enddo - -max_elem = 0d0 -do i = 1, n - if (ABS(tau_m1(i,i)) > ABS(max_elem)) then - max_elem = tau_m1(i,i) - endif -enddo -print*,'max elem tau^-1:', max_elem - -! Debug -!print*,'eigenvalues:' -!do i = 1, n -! print*, e_val(i) -!enddo - -!Debug, display tau^-1 -!if (debug) then -! print*, 'tau^-1' -! do i = 1, n -! print*,tau_m1(i,:) -! enddo -!endif - -! Rotation matrix -! \begin{align*} -! \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} -! \end{align*} -! \begin{align*} -! \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} -! \end{align*} -! \begin{align*} -! \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} -! \end{align*} - -! First: -! part_1 = dgemm(W, dgemm(cos_tau, W^t)) -! part_1a = dgemm(cos_tau, W^t) -! part_1 = dgemm(W, part_1a) -! And: -! part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A)))) -! part_2a = dgemm(W^t, A) -! part_2b = dgemm(sin_tau, part_2a) -! part_2c = dgemm(tau_m1, part_2b) -! part_2 = dgemm(W, part_2c) -! Finally: -! Rotation matrix, R = part_1+part_2 - -! If $R$ is a rotation matrix: -! $R.R^T=R^T.R=\textbf{1}$ - -! part_1 -call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1)) -call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1)) - -! part_2 -call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1)) -call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1)) -call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1)) -call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1)) - -! Rotation matrix R -R = part_1 + part_2 - -! Matrix check -! R.R^t and R^t.R must be equal to identity matrix -do j = 1, n - do i=1,n - if (i==j) then - RR_t(i,j) = 1d0 - else - RR_t(i,j) = 0d0 - endif - enddo -enddo - -call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1)) - -norm = dnrm2(n*n,RR_t,1) -print*, 'Rotation matrix check, norm R.R^T = ', norm - -! Debug -!if (debug) then -! print*, 'RR_t' -! do i = 1, n -! print*, RR_t(i,:) -! enddo -!endif - -! Post conditions - -! Check if R.R^T=1 -max_elem = 0d0 -do j = 1, n - do i = 1, n - if (ABS(RR_t(i,j)) > ABS(max_elem)) then - max_elem = RR_t(i,j) - endif - enddo -enddo - -print*, 'Max error in R.R^T:', max_elem -print*, 'e_val(1):', e_val(1) -print*, 'e_val(n):', e_val(n) -print*, 'max elem in A:', max_elem_A - -if (ABS(max_elem) > 1d-12) then - print*, 'WARNING: max error in R.R^T > 1d-12' - print*, 'Enforce the step cancellation' - enforce_step_cancellation = .True. -endif - -! Matrix elements of R must by non-NaN -do j = 1,n - do i = 1,LDR - if (disnan(R(i,j))) then - info = 666 - print*, 'NaN in rotation matrix' - call ABORT - endif - enddo -enddo - -! Display -!if (debug) then -! print*,'Rotation matrix :' -! do i = 1, n -! write(*,'(100(F10.5))') R(i,:) -! enddo -!endif - -! Deallocation, end - -deallocate(B) - deallocate(m_diag,cos_tau,sin_tau,tau_m1) - deallocate(W,e_val) - deallocate(part_1,part_1a) - deallocate(part_2,part_2a,part_2b,part_2c) - deallocate(RR_t) - - call wall_time(t2) - t3 = t2-t1 - print*,'Time in rotation matrix:', t3 - - print*,'---End rotation_matrix---' - -end subroutine diff --git a/src/utils_trust_region/rotation_matrix.org b/src/utils_trust_region/rotation_matrix.org deleted file mode 100644 index 73ba0298..00000000 --- a/src/utils_trust_region/rotation_matrix.org +++ /dev/null @@ -1,454 +0,0 @@ -* Rotation matrix - -*Build a rotation matrix from an antisymmetric matrix* - -Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as : -$$ -\textbf{R}=\exp(\textbf{A}) -$$ - -So : -\begin{align*} -\textbf{R}=& \exp(\textbf{A}) \\ -=& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\ -=& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A} -\end{align*} - -With : -$\textbf{W}$ : eigenvectors of $\textbf{A}^2$ -$\tau$ : $\sqrt{-x}$ -$x$ : eigenvalues of $\textbf{A}^2$ - -Input: -| A(n,n) | double precision | antisymmetric matrix | -| n | integer | number of columns of the A matrix | -| LDA | integer | specifies the leading dimension of A, must be at least max(1,n) | -| LDR | integer | specifies the leading dimension of R, must be at least max(1,n) | - -Output: -| R(n,n) | double precision | Rotation matrix | -| info | integer | if info = 0, the execution is successful | -| | | if info = k, the k-th parameter has an illegal value | -| | | if info = -k, the algorithm failed | - -Internal: -| B(n,n) | double precision | B = A.A | -| work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork) | -| lwork | integer | dimension of the syev work array >= max(1, 3n-1) | -| W(n,n) | double precision | eigenvectors of B | -| e_val(n) | double precision | eigenvalues of B | -| m_diag(n,n) | double precision | diagonal matrix with the eigenvalues of B | -| cos_tau(n,n) | double precision | diagonal matrix with cos(tau) values | -| sin_tau(n,n) | double precision | diagonal matrix with sin cos(tau) values | -| tau_m1(n,n) | double precision | diagonal matrix with (tau)^-1 values | -| part_1(n,n) | double precision | matrix W.cos_tau.W^t | -| part_1a(n,n) | double precision | matrix cos_tau.W^t | -| part_2(n,n) | double precision | matrix W.tau_m1.sin_tau.W^t.A | -| part_2a(n,n) | double precision | matrix W^t.A | -| part_2b(n,n) | double precision | matrix sin_tau.W^t.A | -| part_2c(n,n) | double precision | matrix tau_m1.sin_tau.W^t.A | -| RR_t(n,n) | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 | -| norm | integer | norm of R.R^t-1, must be equal to 0 | -| i,j | integer | indexes | - -Functions: -| dnrm2 | double precision | Lapack function, compute the norm of a matrix | -| disnan | logical | Lapack function, check if an element is NaN | - - -#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f -subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) - - implicit none - - BEGIN_DOC - ! Rotation matrix to rotate the molecular orbitals. - ! If the rotation is too large the transformation is not unitary and must be cancelled. - END_DOC - - include 'pi.h' - - ! Variables - - ! in - integer, intent(in) :: n,LDA,LDR - double precision, intent(inout) :: A(LDA,n) - - ! out - double precision, intent(out) :: R(LDR,n) - integer, intent(out) :: info - logical, intent(out) :: enforce_step_cancellation - - ! internal - double precision, allocatable :: B(:,:) - double precision, allocatable :: work(:,:) - double precision, allocatable :: W(:,:), e_val(:) - double precision, allocatable :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:) - double precision, allocatable :: part_1(:,:),part_1a(:,:) - double precision, allocatable :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:) - double precision, allocatable :: RR_t(:,:) - integer :: i,j - integer :: info2, lwork ! for dsyev - double precision :: norm, max_elem, max_elem_A, t1,t2,t3 - - ! function - double precision :: dnrm2 - logical :: disnan - - print*,'' - print*,'---rotation_matrix---' - - call wall_time(t1) - - ! Allocation - allocate(B(n,n)) - allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n)) - allocate(W(n,n),e_val(n)) - allocate(part_1(n,n),part_1a(n,n)) - allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n)) - allocate(RR_t(n,n)) -#+END_SRC - -** Pre-conditions -#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f - ! Initialization - info=0 - enforce_step_cancellation = .False. - - ! Size of matrix A must be at least 1 by 1 - if (n<1) then - info = 3 - print*, 'WARNING: invalid parameter 5' - print*, 'n<1' - return - endif - - ! Leading dimension of A must be >= n - if (LDA < n) then - info = 25 - print*, 'WARNING: invalid parameter 2 or 5' - print*, 'LDA < n' - return - endif - - ! Leading dimension of A must be >= n - if (LDR < n) then - info = 4 - print*, 'WARNING: invalid parameter 4' - print*, 'LDR < n' - return - endif - - ! Matrix elements of A must by non-NaN - do j = 1, n - do i = 1, n - if (disnan(A(i,j))) then - info=1 - print*, 'WARNING: invalid parameter 1' - print*, 'NaN element in A matrix' - return - endif - enddo - enddo - - do i = 1, n - if (A(i,i) /= 0d0) then - print*, 'WARNING: matrix A is not antisymmetric' - print*, 'Non 0 element on the diagonal', i, A(i,i) - call ABORT - endif - enddo - - do j = 1, n - do i = 1, n - if (A(i,j)+A(j,i)>1d-16) then - print*, 'WANRING: matrix A is not antisymmetric' - print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i) - print*, 'diff:', A(i,j)+A(j,i) - call ABORT - endif - enddo - enddo - - ! Fix for too big elements ! bad idea better to cancel if the error is too big - !do j = 1, n - ! do i = 1, n - ! A(i,j) = mod(A(i,j),2d0*pi) - ! if (dabs(A(i,j)) > pi) then - ! A(i,j) = 0d0 - ! endif - ! enddo - !enddo - - max_elem_A = 0d0 - do j = 1, n - do i = 1, n - if (ABS(A(i,j)) > ABS(max_elem_A)) then - max_elem_A = A(i,j) - endif - enddo - enddo - print*,'max element in A', max_elem_A - - if (ABS(max_elem_A) > 2 * pi) then - print*,'' - print*,'WARNING: ABS(max_elem_A) > 2 pi ' - print*,'' - endif - -#+END_SRC - -** Calculations - -*** B=A.A - - Calculation of the matrix $\textbf{B} = \textbf{A}^2$ - - Diagonalization of $\textbf{B}$ - W, the eigenvectors - e_val, the eigenvalues - - #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f - ! Compute B=A.A - - call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1)) - - ! Copy B in W, diagonalization will put the eigenvectors in W - W=B - - ! Diagonalization of B - ! Eigenvalues -> e_val - ! Eigenvectors -> W - lwork = 3*n-1 - allocate(work(lwork,n)) - - print*,'Starting diagonalization ...' - - call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2) - - deallocate(work) - - if (info2 == 0) then - print*, 'Diagonalization : Done' - elseif (info2 < 0) then - print*, 'WARNING: error in the diagonalization' - print*, 'Illegal value of the ', info2,'-th parameter' - else - print*, "WARNING: Diagonalization failed to converge" - endif - #+END_SRC - -*** Tau^-1, cos(tau), sin(tau) - $$\tau = \sqrt{-x}$$ - - Calculation of $\cos(\tau)$ $\Leftrightarrow$ $\cos(\sqrt{-x})$ - - Calculation of $\sin(\tau)$ $\Leftrightarrow$ $\sin(\sqrt{-x})$ - - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$ - These matrices are diagonals - #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f - ! Diagonal matrix m_diag - do j = 1, n - if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems - e_val(j) = 0.d0 - else - e_val(j) = - e_val(j) - endif - enddo - - m_diag = 0.d0 - do i = 1, n - m_diag(i,i) = e_val(i) - enddo - - ! cos_tau - do j = 1, n - do i = 1, n - if (i==j) then - cos_tau(i,j) = dcos(dsqrt(e_val(i))) - else - cos_tau(i,j) = 0d0 - endif - enddo - enddo - - ! sin_tau - do j = 1, n - do i = 1, n - if (i==j) then - sin_tau(i,j) = dsin(dsqrt(e_val(i))) - else - sin_tau(i,j) = 0d0 - endif - enddo - enddo - - ! Debug, display the cos_tau and sin_tau matrix - !if (debug) then - ! print*, 'cos_tau' - ! do i = 1, n - ! print*, cos_tau(i,:) - ! enddo - ! print*, 'sin_tau' - ! do i = 1, n - ! print*, sin_tau(i,:) - ! enddo - !endif - - ! tau^-1 - do j = 1, n - do i = 1, n - if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small - tau_m1(i,j) = 1d0/(dsqrt(e_val(i))) - else - tau_m1(i,j) = 0d0 - endif - enddo - enddo - - max_elem = 0d0 - do i = 1, n - if (ABS(tau_m1(i,i)) > ABS(max_elem)) then - max_elem = tau_m1(i,i) - endif - enddo - print*,'max elem tau^-1:', max_elem - - ! Debug - !print*,'eigenvalues:' - !do i = 1, n - ! print*, e_val(i) - !enddo - - !Debug, display tau^-1 - !if (debug) then - ! print*, 'tau^-1' - ! do i = 1, n - ! print*,tau_m1(i,:) - ! enddo - !endif - #+END_SRC - -*** Rotation matrix - \begin{align*} - \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} - \end{align*} - \begin{align*} - \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} - \end{align*} - \begin{align*} - \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} - \end{align*} - - First: - part_1 = dgemm(W, dgemm(cos_tau, W^t)) - part_1a = dgemm(cos_tau, W^t) - part_1 = dgemm(W, part_1a) - And: - part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A)))) - part_2a = dgemm(W^t, A) - part_2b = dgemm(sin_tau, part_2a) - part_2c = dgemm(tau_m1, part_2b) - part_2 = dgemm(W, part_2c) - Finally: - Rotation matrix, R = part_1+part_2 - - If $R$ is a rotation matrix: - $R.R^T=R^T.R=\textbf{1}$ - #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f - ! part_1 - call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1)) - call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1)) - - ! part_2 - call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1)) - call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1)) - call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1)) - call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1)) - - ! Rotation matrix R - R = part_1 + part_2 - - ! Matrix check - ! R.R^t and R^t.R must be equal to identity matrix - do j = 1, n - do i=1,n - if (i==j) then - RR_t(i,j) = 1d0 - else - RR_t(i,j) = 0d0 - endif - enddo - enddo - - call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1)) - - norm = dnrm2(n*n,RR_t,1) - print*, 'Rotation matrix check, norm R.R^T = ', norm - - ! Debug - !if (debug) then - ! print*, 'RR_t' - ! do i = 1, n - ! print*, RR_t(i,:) - ! enddo - !endif - #+END_SRC - -*** Post conditions - #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f - ! Check if R.R^T=1 - max_elem = 0d0 - do j = 1, n - do i = 1, n - if (ABS(RR_t(i,j)) > ABS(max_elem)) then - max_elem = RR_t(i,j) - endif - enddo - enddo - - print*, 'Max error in R.R^T:', max_elem - print*, 'e_val(1):', e_val(1) - print*, 'e_val(n):', e_val(n) - print*, 'max elem in A:', max_elem_A - - if (ABS(max_elem) > 1d-12) then - print*, 'WARNING: max error in R.R^T > 1d-12' - print*, 'Enforce the step cancellation' - enforce_step_cancellation = .True. - endif - - ! Matrix elements of R must by non-NaN - do j = 1,n - do i = 1,LDR - if (disnan(R(i,j))) then - info = 666 - print*, 'NaN in rotation matrix' - call ABORT - endif - enddo - enddo - - ! Display - !if (debug) then - ! print*,'Rotation matrix :' - ! do i = 1, n - ! write(*,'(100(F10.5))') R(i,:) - ! enddo - !endif - #+END_SRC - -** Deallocation, end - #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f - deallocate(B) - deallocate(m_diag,cos_tau,sin_tau,tau_m1) - deallocate(W,e_val) - deallocate(part_1,part_1a) - deallocate(part_2,part_2a,part_2b,part_2c) - deallocate(RR_t) - - call wall_time(t2) - t3 = t2-t1 - print*,'Time in rotation matrix:', t3 - - print*,'---End rotation_matrix---' - -end subroutine - #+END_SRC - diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f deleted file mode 100644 index bdd1f6ba..00000000 --- a/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f +++ /dev/null @@ -1,64 +0,0 @@ -! Rotation matrix in a subspace to rotation matrix in the full space - -! Usually, we are using a list of MOs, for exemple the active ones. When -! we compute a rotation matrix to rotate the MOs, we just compute a -! rotation matrix for these MOs in order to reduce the size of the -! matrix which has to be computed. Since the computation of a rotation -! matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to -! reuce the number of MOs involved. -! After that we replace the rotation matrix in the full space by -! building the elements of the rotation matrix in the full space from -! the elements of the rotation matrix in the subspace and adding some 0 -! on the extradiagonal elements and some 1 on the diagonal elements, -! for the MOs that are not involved in the rotation. - -! Provided: -! | mo_num | integer | Number of MOs | - -! Input: -! | m | integer | Size of tmp_list, m <= mo_num | -! | tmp_list(m) | integer | List of MOs | -! | tmp_R(m,m) | double precision | Rotation matrix in the space of | -! | | | the MOs containing by tmp_list | - -! Output: -! | R(mo_num,mo_num | double precision | Rotation matrix in the space | -! | | | of all the MOs | - -! Internal: -! | i,j | integer | indexes in the full space | -! | tmp_i,tmp_j | integer | indexes in the subspace | - - -subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) - - BEGIN_DOC - ! Compute the full rotation matrix from a smaller one - END_DOC - - implicit none - - ! in - integer, intent(in) :: m, tmp_list(m) - double precision, intent(in) :: tmp_R(m,m) - - ! out - double precision, intent(out) :: R(mo_num,mo_num) - - ! internal - integer :: i,j,tmp_i,tmp_j - - ! tmp_R to R, subspace to full space - R = 0d0 - do i = 1, mo_num - R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital - enddo - do tmp_j = 1, m - j = tmp_list(tmp_j) - do tmp_i = 1, m - i = tmp_list(tmp_i) - R(i,j) = tmp_R(tmp_i,tmp_j) - enddo - enddo - -end diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.org b/src/utils_trust_region/sub_to_full_rotation_matrix.org deleted file mode 100644 index 16434dc8..00000000 --- a/src/utils_trust_region/sub_to_full_rotation_matrix.org +++ /dev/null @@ -1,65 +0,0 @@ -* Rotation matrix in a subspace to rotation matrix in the full space - -Usually, we are using a list of MOs, for exemple the active ones. When -we compute a rotation matrix to rotate the MOs, we just compute a -rotation matrix for these MOs in order to reduce the size of the -matrix which has to be computed. Since the computation of a rotation -matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to -reuce the number of MOs involved. -After that we replace the rotation matrix in the full space by -building the elements of the rotation matrix in the full space from -the elements of the rotation matrix in the subspace and adding some 0 -on the extradiagonal elements and some 1 on the diagonal elements, -for the MOs that are not involved in the rotation. - -Provided: -| mo_num | integer | Number of MOs | - -Input: -| m | integer | Size of tmp_list, m <= mo_num | -| tmp_list(m) | integer | List of MOs | -| tmp_R(m,m) | double precision | Rotation matrix in the space of | -| | | the MOs containing by tmp_list | - -Output: -| R(mo_num,mo_num | double precision | Rotation matrix in the space | -| | | of all the MOs | - -Internal: -| i,j | integer | indexes in the full space | -| tmp_i,tmp_j | integer | indexes in the subspace | - -#+BEGIN_SRC f90 :comments org :tangle sub_to_full_rotation_matrix.irp.f -subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) - - BEGIN_DOC - ! Compute the full rotation matrix from a smaller one - END_DOC - - implicit none - - ! in - integer, intent(in) :: m, tmp_list(m) - double precision, intent(in) :: tmp_R(m,m) - - ! out - double precision, intent(out) :: R(mo_num,mo_num) - - ! internal - integer :: i,j,tmp_i,tmp_j - - ! tmp_R to R, subspace to full space - R = 0d0 - do i = 1, mo_num - R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital - enddo - do tmp_j = 1, m - j = tmp_list(tmp_j) - do tmp_i = 1, m - i = tmp_list(tmp_i) - R(i,j) = tmp_R(tmp_i,tmp_j) - enddo - enddo - -end -#+END_SRC diff --git a/src/utils_trust_region/trust_region_expected_e.irp.f b/src/utils_trust_region/trust_region_expected_e.irp.f deleted file mode 100644 index b7d849d1..00000000 --- a/src/utils_trust_region/trust_region_expected_e.irp.f +++ /dev/null @@ -1,119 +0,0 @@ -! Predicted energy : e_model - -! *Compute the energy predicted by the Taylor series* - -! The energy is predicted using a Taylor expansion truncated at te 2nd -! order : - -! \begin{align*} -! E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2) -! \end{align*} - -! Input: -! | n | integer | m*(m-1)/2 | -! | v_grad(n) | double precision | gradient | -! | H(n,n) | double precision | hessian | -! | x(n) | double precision | Step in the trust region | -! | prev_energy | double precision | previous energy | - -! Output: -! | e_model | double precision | predicted energy after the rotation of the MOs | - -! Internal: -! | part_1 | double precision | v_grad^T.x | -! | part_2 | double precision | 1/2 . x^T.H.x | -! | part_2a | double precision | H.x | -! | i,j | integer | indexes | - -! Function: -! | ddot | double precision | dot product (Lapack) | - - -subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model) - - include 'pi.h' - - BEGIN_DOC - ! Compute the expected criterion/energy after the application of the step x - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: v_grad(n),H(n,n),x(n) - double precision, intent(in) :: prev_energy - - ! out - double precision, intent(out) :: e_model - - ! internal - double precision :: part_1, part_2, t1,t2,t3 - double precision, allocatable :: part_2a(:) - - integer :: i,j - - !Function - double precision :: ddot - - print*,'' - print*,'---Trust_e_model---' - - call wall_time(t1) - - ! Allocation - allocate(part_2a(n)) - -! Calculations - -! part_1 corresponds to the product g.x -! part_2a corresponds to the product H.x -! part_2 corresponds to the product 0.5*(x^T.H.x) - -! TODO: remove the dot products - - -! Product v_grad.x - part_1 = ddot(n,v_grad,1,x,1) - - !if (debug) then - print*,'g.x : ', part_1 - !endif - - ! Product H.x - call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) - - ! Product 1/2 . x^T.H.x - part_2 = 0.5d0 * ddot(n,x,1,part_2a,1) - - !if (debug) then - print*,'1/2*x^T.H.x : ', part_2 - !endif - - print*,'prev_energy', prev_energy - - ! Sum - e_model = prev_energy + part_1 + part_2 - - ! Writing the predicted energy - print*, 'Predicted energy after the rotation : ', e_model - print*, 'Previous energy - predicted energy:', prev_energy - e_model - - ! Can be deleted, already in another subroutine - if (DABS(prev_energy - e_model) < 1d-12 ) then - print*,'WARNING: ABS(prev_energy - e_model) < 1d-12' - endif - - ! Deallocation - deallocate(part_2a) - - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in trust e model:', t3 - - print*,'---End trust_e_model---' - print*,'' - -end subroutine diff --git a/src/utils_trust_region/trust_region_expected_e.org b/src/utils_trust_region/trust_region_expected_e.org deleted file mode 100644 index 58c8f804..00000000 --- a/src/utils_trust_region/trust_region_expected_e.org +++ /dev/null @@ -1,121 +0,0 @@ -* Predicted energy : e_model - -*Compute the energy predicted by the Taylor series* - -The energy is predicted using a Taylor expansion truncated at te 2nd -order : - -\begin{align*} -E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2) -\end{align*} - -Input: -| n | integer | m*(m-1)/2 | -| v_grad(n) | double precision | gradient | -| H(n,n) | double precision | hessian | -| x(n) | double precision | Step in the trust region | -| prev_energy | double precision | previous energy | - -Output: -| e_model | double precision | predicted energy after the rotation of the MOs | - -Internal: -| part_1 | double precision | v_grad^T.x | -| part_2 | double precision | 1/2 . x^T.H.x | -| part_2a | double precision | H.x | -| i,j | integer | indexes | - -Function: -| ddot | double precision | dot product (Lapack) | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f -subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model) - - include 'pi.h' - - BEGIN_DOC - ! Compute the expected criterion/energy after the application of the step x - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: v_grad(n),H(n,n),x(n) - double precision, intent(in) :: prev_energy - - ! out - double precision, intent(out) :: e_model - - ! internal - double precision :: part_1, part_2, t1,t2,t3 - double precision, allocatable :: part_2a(:) - - integer :: i,j - - !Function - double precision :: ddot - - print*,'' - print*,'---Trust_e_model---' - - call wall_time(t1) - - ! Allocation - allocate(part_2a(n)) -#+END_SRC - -** Calculations - -part_1 corresponds to the product g.x -part_2a corresponds to the product H.x -part_2 corresponds to the product 0.5*(x^T.H.x) - -TODO: remove the dot products - -#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f - ! Product v_grad.x - part_1 = ddot(n,v_grad,1,x,1) - - !if (debug) then - print*,'g.x : ', part_1 - !endif - - ! Product H.x - call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) - - ! Product 1/2 . x^T.H.x - part_2 = 0.5d0 * ddot(n,x,1,part_2a,1) - - !if (debug) then - print*,'1/2*x^T.H.x : ', part_2 - !endif - - print*,'prev_energy', prev_energy - - ! Sum - e_model = prev_energy + part_1 + part_2 - - ! Writing the predicted energy - print*, 'Predicted energy after the rotation : ', e_model - print*, 'Previous energy - predicted energy:', prev_energy - e_model - - ! Can be deleted, already in another subroutine - if (DABS(prev_energy - e_model) < 1d-12 ) then - print*,'WARNING: ABS(prev_energy - e_model) < 1d-12' - endif - - ! Deallocation - deallocate(part_2a) - - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in trust e model:', t3 - - print*,'---End trust_e_model---' - print*,'' - -end subroutine -#+END_SRC diff --git a/src/utils_trust_region/trust_region_optimal_lambda.irp.f b/src/utils_trust_region/trust_region_optimal_lambda.irp.f deleted file mode 100644 index f71bb405..00000000 --- a/src/utils_trust_region/trust_region_optimal_lambda.irp.f +++ /dev/null @@ -1,1655 +0,0 @@ -! Newton's method to find the optimal lambda - -! *Compute the lambda value for the trust region* - -! This subroutine uses the Newton method in order to find the optimal -! lambda. This constant is added on the diagonal of the hessian to shift -! the eiganvalues. It has a double role: -! - ensure that the resulting hessian is positive definite for the -! Newton method -! - constrain the step in the trust region, i.e., -! $||\textbf{x}(\lambda)|| \leq \Delta$, where $\Delta$ is the radius -! of the trust region. -! We search $\lambda$ which minimizes -! \begin{align*} -! f(\lambda) = (||\textbf{x}_{(k+1)}(\lambda)||^2 -\Delta^2)^2 -! \end{align*} -! or -! \begin{align*} -! \tilde{f}(\lambda) = (\frac{1}{||\textbf{x}_{(k+1)}(\lambda)||^2}-\frac{1}{\Delta^2})^2 -! \end{align*} -! and gives obviously 0 in both cases. \newline - -! There are several cases: -! - If $\textbf{H}$ is positive definite the interval containing the -! solution is $\lambda \in (0, \infty)$ (and $-h_1 < 0$). -! - If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot -! \textbf{g} \neq 0$ then the interval containing -! the solution is $\lambda \in (-h_1, \infty)$. -! - If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot -! \textbf{g} = 0$ then the interval containing the solution is -! $\lambda \in (-h_1, \infty)$. The terms where $|h_i - \lambda| < -! 10^{-12}$ are not computed, so the term where $i = 1$ is -! automatically removed and this case becomes similar to the previous one. - -! So to avoid numerical problems (cf. trust_region) we start the -! algorithm at $\lambda=\max(0 + \epsilon,-h_1 + \epsilon)$, -! with $\epsilon$ a little constant. -! The research must be restricted to the interval containing the -! solution. For that reason a little trust region in 1D is used. - -! The Newton method to find the optimal $\lambda$ is : -! \begin{align*} -! \lambda_{(l+1)} &= \lambda_{(l)} - f^{''}(\lambda)_{(l)}^{-1} f^{'}(\lambda)_{(l)}^{} \\ -! \end{align*} -! $f^{'}(\lambda)_{(l)}$: the first derivative of $f$ with respect to -! $\lambda$ at the l-th iteration, -! $f^{''}(\lambda)_{(l)}$: the second derivative of $f$ with respect to -! $\lambda$ at the l-th iteration.\newline - -! Noting the Newton step $y = - f^{''}(\lambda)_{(l)}^{-1} -! f^{'}(\lambda)_{(l)}^{}$ we constrain $y$ such as -! \begin{align*} -! y \leq \alpha -! \end{align*} -! with $\alpha$ a scalar representing the trust length (trust region in -! 1D) where the function $f$ or $\tilde{f}$ is correctly describe by the -! Taylor series truncated at the second order. Thus, if $y > \alpha$, -! the constraint is applied as -! \begin{align*} -! y^* = \alpha \frac{y}{|y|} -! \end{align*} -! with $y^*$ the solution in the trust region. - -! The size of the trust region evolves in function of $\rho$ as for the -! trust region seen previously cf. trust_region, rho_model. -! The prediction of the value of $f$ or $\tilde{f}$ is done using the -! Taylor series truncated at the second order cf. "trust_region", -! "trust_e_model". - -! The first and second derivatives of $f(\lambda) = (||\textbf{x}(\lambda)||^2 - -! \Delta^2)^2$ with respect to $\lambda$ are: -! \begin{align*} -! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -! = 2 \left(\sum_{i=1}^n \frac{-2(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) -! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) -! \end{align*} -! \begin{align*} -! \frac{\partial^2}{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] -! \end{align*} - -! The first and second derivatives of $\tilde{f}(\lambda) = (1/||\textbf{x}(\lambda)||^2 - -! 1/\Delta^2)^2$ with respect to $\lambda$ are: -! \begin{align*} -! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 -! &= 4 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} -! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - \frac{4}{\Delta^2} \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} -! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ -! &= 4 \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} -! \left( \frac{1}{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - \frac{1}{\Delta^2 (\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) -! \end{align*} - -! \begin{align*} -! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 -! &= 4 \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2} -! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} -! - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} -! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ -! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2} -! {(h_i + \lambda)^3)})^2}{(\sum_ {i=1}^n\frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} -! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] -! \end{align*} - -! Provided in qp_edit: -! | thresh_rho_2 | -! | thresh_cc | -! | nb_it_max_lambda | -! | version_lambda_search | -! | nb_it_max_pre_search | -! see qp_edit for more details - -! Input: -! | n | integer | m*(m-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | tmp_wtg(n) | double precision | w_i^T.v_grad(i) | -! | delta | double precision | delta for the trust region | - -! Output: -! | lambda | double precision | Lagrange multiplier to constrain the norm of the size of the Newton step | -! | | | lambda > 0 | - -! Internal: -! | d1_N | double precision | value of d1_norm_trust_region | -! | d2_N | double precision | value of d2_norm_trust_region | -! | f_N | double precision | value of f_norm_trust_region | -! | prev_f_N | double precision | previous value of f_norm_trust_region | -! | f_R | double precision | (norm(x)^2 - delta^2)^2 or (1/norm(x)^2 - 1/delta^2)^2 | -! | prev_f_R | double precision | previous value of f_R | -! | model | double precision | predicted value of f_R from prev_f_R and y | -! | d_1 | double precision | value of the first derivative | -! | d_2 | double precision | value of the second derivative | -! | y | double precision | Newton's step, y = -f''^-1 . f' = lambda - prev_lambda | -! | prev_lambda | double precision | previous value of lambda | -! | t1,t2,t3 | double precision | wall time | -! | i | integer | index | -! | epsilon | double precision | little constant to avoid numerical problem | -! | rho_2 | double precision | (prev_f_R - f_R)/(prev_f_R - model), agreement between model and f_R | -! | version | integer | version of the root finding method | - -! Function: -! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | -! | d2_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | -! | d1_norm_inverse_trust_region | double precision | first derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | -! | d2_norm_inverse_trust_region | double precision | second derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | -! | f_norm_trust_region | double precision | value of norm(x)^2 | - - - -subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) - - include 'pi.h' - - BEGIN_DOC - ! Research the optimal lambda to constrain the step size in the trust region - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(inout) :: e_val(n) - double precision, intent(in) :: delta - double precision, intent(in) :: tmp_wtg(n) - - ! out - double precision, intent(out) :: lambda - - ! Internal - double precision :: d1_N, d2_N, f_N, prev_f_N - double precision :: prev_f_R, f_R - double precision :: model - double precision :: d_1, d_2 - double precision :: t1,t2,t3 - integer :: i - double precision :: epsilon - double precision :: y - double precision :: prev_lambda - double precision :: rho_2 - double precision :: alpha - integer :: version - - ! Functions - double precision :: d1_norm_trust_region,d1_norm_trust_region_omp - double precision :: d2_norm_trust_region, d2_norm_trust_region_omp - double precision :: f_norm_trust_region, f_norm_trust_region_omp - double precision :: d1_norm_inverse_trust_region - double precision :: d2_norm_inverse_trust_region - double precision :: d1_norm_inverse_trust_region_omp - double precision :: d2_norm_inverse_trust_region_omp - - print*,'' - print*,'---Trust_newton---' - print*,'' - - call wall_time(t1) - - ! version_lambda_search - ! 1 -> ||x||^2 - delta^2 = 0, - ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better) - if (version_lambda_search == 1) then - print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' - else - print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' - endif - ! Version 2 is normally better - - - -! Resolution with the Newton method: - - -! Initialization - epsilon = 1d-4 - lambda =MAX(0d0, -e_val(1)) - - ! Pre research of lambda to start near the optimal lambda - ! by adding a constant epsilon and changing the constant to - ! have ||x(lambda + epsilon)|| ~ delta, before setting - ! lambda = lambda + epsilon - print*, 'Pre research of lambda:' - print*,'Initial lambda =', lambda - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta - i = 1 - - ! To increase lambda - if (f_N > delta**2) then - print*,'Increasing lambda...' - do while (f_N > delta**2 .and. i <= nb_it_max_pre_search) - - ! Update the previous norm - prev_f_N = f_N - ! New epsilon - epsilon = epsilon * 2d0 - ! New norm - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - - print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta - - ! Security - if (prev_f_N < f_N) then - print*,'WARNING, error: prev_f_N < f_N, exit' - epsilon = epsilon * 0.5d0 - i = nb_it_max_pre_search + 1 - endif - - i = i + 1 - enddo - - ! To reduce lambda - else - print*,'Reducing lambda...' - do while (f_N < delta**2 .and. i <= nb_it_max_pre_search) - - ! Update the previous norm - prev_f_N = f_N - ! New epsilon - epsilon = epsilon * 0.5d0 - ! New norm - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - - print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta - - ! Security - if (prev_f_N > f_N) then - print*,'WARNING, error: prev_f_N > f_N, exit' - epsilon = epsilon * 2d0 - i = nb_it_max_pre_search + 1 - endif - - i = i + 1 - enddo - endif - - print*,'End of the pre research of lambda' - - ! New value of lambda - lambda = lambda + epsilon - - print*, 'e_val(1):', e_val(1) - print*, 'Staring point, lambda =', lambda - - ! thresh_cc, threshold for the research of the optimal lambda - ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc - ! thresh_rho_2, threshold to cancel the step in the research - ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2 - print*,'Threshold for the CC:', thresh_cc - print*,'Threshold for rho_2:', thresh_rho_2 - - print*, 'w_1^T . g =', tmp_wtg(1) - - ! Debug - !if (debug) then - ! print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' - !endif - - ! Initialization - i = 1 - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) ! Value of the ||x(lambda)||^2 - model = 0d0 ! predicted value of (||x||^2 - delta^2)^2 - prev_f_N = 0d0 ! previous value of ||x||^2 - prev_f_R = 0d0 ! previous value of (||x||^2 - delta^2)^2 - f_R = 0d0 ! value of (||x||^2 - delta^2)^2 - rho_2 = 0d0 ! (prev_f_R - f_R)/(prev_f_R - m) - y = 0d0 ! step size - prev_lambda = 0d0 ! previous lambda - - ! Derivatives - if (version_lambda_search == 1) then - d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 - d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 - else - d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - endif - - ! Trust length - alpha = DABS((1d0/d_2)*d_1) - - ! Newton's method - do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc) - print*,'--------------------------------------' - print*,'Research of lambda, iteration:', i - print*,'--------------------------------------' - - ! Update of f_N, f_R and the derivatives - prev_f_N = f_N - if (version_lambda_search == 1) then - prev_f_R = (prev_f_N - delta**2)**2 - d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 - d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 - else - prev_f_R = (1d0/prev_f_N - 1d0/delta**2)**2 - d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - endif - write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 - - ! Newton's step - y = -(1d0/DABS(d_2))*d_1 - - ! Constraint on y (the newton step) - if (DABS(y) > alpha) then - y = alpha * (y/DABS(y)) ! preservation of the sign of y - endif - write(*,'(a,E12.5)') ' Step length: ', y - - ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series - model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 - - ! Updates lambda - prev_lambda = lambda - lambda = prev_lambda + y - print*,'prev lambda:', prev_lambda - print*,'new lambda:', lambda - - ! Checks if lambda is in (-h_1, \infty) - if (lambda > MAX(0d0, -e_val(1))) then - ! New value of ||x(lambda)||^2 - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) - - ! New f_R - if (version_lambda_search == 1) then - f_R = (f_N - delta**2)**2 ! new value of (||x(lambda)||^2 - delta^2)^2 - else - f_R = (1d0/f_N - 1d0/delta**2)**2 ! new value of (1/||x(lambda)||^2 -1/delta^2)^2 - endif - - if (version_lambda_search == 1) then - print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R - print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R - print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model - else - print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R - print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R - print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model - endif - - print*,'previous - actual:', prev_f_R - f_R - print*,'previous - model:', prev_f_R - model - - ! Check the gain - if (DABS(prev_f_R - model) < thresh_model_2) then - print*,'' - print*,'WARNING: ABS(previous - model) <', thresh_model_2, 'rho_2 will tend toward infinity' - print*,'' - endif - - ! Will be deleted - !if (prev_f_R - f_R <= 1d-16 .or. prev_f_R - model <= 1d-16) then - ! print*,'' - ! print*,'WARNING: ABS(previous - model) <= 1d-16, exit' - ! print*,'' - ! exit - !endif - - ! Computes rho_2 - rho_2 = (prev_f_R - f_R)/(prev_f_R - model) - print*,'rho_2:', rho_2 - else - rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty) - print*,'lambda < -e_val(1) ===> rho_2 = 0' - endif - - ! Evolution of the trust length, alpha - if (rho_2 >= 0.75d0) then - alpha = 2d0 * alpha - elseif (rho_2 >= 0.5d0) then - alpha = alpha - elseif (rho_2 >= 0.25d0) then - alpha = 0.5d0 * alpha - else - alpha = 0.25d0 * alpha - endif - write(*,'(a,E12.5)') ' New trust length alpha: ', alpha - - ! cancellaion of the step if rho < 0.1 - if (rho_2 < thresh_rho_2) then !0.1d0) then - lambda = prev_lambda - f_N = prev_f_N - print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' - endif - - print*,'' - print*,'lambda, ||x||, delta:' - print*, lambda, dsqrt(f_N), delta - print*,'CC:', DABS(1d0 - f_N/delta**2) - print*,'' - - i = i + 1 - enddo - - ! if trust newton failed - if (i > nb_it_max_lambda) then - print*,'' - print*,'######################################################' - print*,'WARNING: i >', nb_it_max_lambda,'for the trust Newton' - print*,'The research of the optimal lambda has failed' - print*,'######################################################' - print*,'' - endif - - print*,'Number of iterations :', i - print*,'Value of lambda :', lambda - print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 - print*,'Error on the trust region (||x||^2 - delta^2)^2) :', (f_N - delta**2)**2 - print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 - - ! Time - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in trust_newton:', t3 - - print*,'' - print*,'---End trust_newton---' - print*,'' - -end subroutine - -! OMP: First derivative of (||x||^2 - Delta^2)^2 - -! *Function to compute the first derivative of (||x||^2 - Delta^2)^2* - -! This function computes the first derivative of (||x||^2 - Delta^2)^2 -! with respect to lambda. - -! \begin{align*} -! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -! = -4 \left(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} \right) -! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i+ \lambda)^2} \right) -! \end{align*} - -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} -! \end{align*} - -! Provided: -! | mo_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | accu1 | double precision | first sum of the formula | -! | accu2 | double precision | second sum of the formula | -! | tmp_accu1 | double precision | temporary array for the first sum | -! | tmp_accu2 | double precision | temporary array for the second sum | -! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | - - -function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg,accu1,accu2 - integer :: i,j - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) - - ! Functions - double precision :: d1_norm_trust_region_omp - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n)) - - ! OMP - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1,accu2) & - !$OMP DEFAULT(NONE) - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (ABS(e_val(i)) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - call omp_set_max_active_levels(4) - - d1_norm_trust_region_omp = -4d0 * accu2 * (accu1 - delta**2) - - deallocate(tmp_accu1, tmp_accu2) - -end function - -! OMP: Second derivative of (||x||^2 - Delta^2)^2 - -! *Function to compute the second derivative of (||x||^2 - Delta^2)^2* - -! This function computes the second derivative of (||x||^2 - Delta^2)^2 -! with respect to lambda. -! \begin{align*} -! \frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] -! \end{align*} - -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ -! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -! \end{align*} - -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | accu1 | double precision | first sum of the formula | -! | accu2 | double precision | second sum of the formula | -! | accu3 | double precision | third sum of the formula | -! | tmp_accu1 | double precision | temporary array for the first sum | -! | tmp_accu2 | double precision | temporary array for the second sum | -! | tmp_accu2 | double precision | temporary array for the third sum | -! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d2_norm_trust_region | double precision | second derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | - - -function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Functions - double precision :: d2_norm_trust_region_omp - double precision :: ddot - - ! Internal - double precision :: accu1,accu2,accu3 - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) - integer :: i, j - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) - - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & - !$OMP accu1, accu2, accu3) & - !$OMP DEFAULT(NONE) - - ! Initialization - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - !$OMP DO - do i = 1, n - tmp_accu3(i) = 0d0 - enddo - !$OMP END DO - - ! Calculations - - ! accu1 - !$OMP DO - do i = 1, n - if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - - ! accu2 - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - ! accu3 - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu3 = accu3 + tmp_accu3(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - d2_norm_trust_region_omp = 2d0 * (6d0 * accu3 * (- delta**2 + accu1) + (-2d0 * accu2)**2) - - deallocate(tmp_accu1, tmp_accu2, tmp_accu3) - -end function - -! OMP: Function value of ||x||^2 - -! *Compute the value of ||x||^2* - -! This function computes the value of ||x(lambda)||^2 - -! \begin{align*} -! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} -! \end{align*} - -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | - -! Internal: -! | tmp_wtg(n) | double precision | temporary array for W^T.v_grad | -! | tmp_fN | double precision | temporary array for the function | -! | i,j | integer | indexes | - - -function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) - - use omp_lib - - include 'pi.h' - - BEGIN_DOC - ! Compute ||x(lambda)||^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - - ! functions - double precision :: f_norm_trust_region_omp - - ! internal - double precision, allocatable :: tmp_fN(:) - integer :: i,j - - ! Allocation - allocate(tmp_fN(n)) - - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_fN, tmp_wtg, f_norm_trust_region_omp) & - !$OMP DEFAULT(NONE) - - ! Initialization - - !$OMP MASTER - f_norm_trust_region_omp = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_fN(i) = 0d0 - enddo - !$OMP END DO - - ! Calculations - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_fN(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - f_norm_trust_region_omp = f_norm_trust_region_omp + tmp_fN(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - deallocate(tmp_fN) - -end function - -! First derivative of (||x||^2 - Delta^2)^2 -! Version without omp - -! *Function to compute the first derivative of ||x||^2 - Delta* - -! This function computes the first derivative of (||x||^2 - Delta^2)^2 -! with respect to lambda. - -! \begin{align*} -! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -! = 2 \left(-2\sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) -! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) -! \end{align*} - -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} -! \end{align*} - -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | accu1 | double precision | first sum of the formula | -! | accu2 | double precision | second sum of the formula | -! | wtg | double precision | temporary variable to store W^T.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | -! | ddot | double precision | blas dot product | - - -function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg, accu1, accu2 - integer :: i, j - - ! Functions - double precision :: d1_norm_trust_region - double precision :: ddot - - ! Initialization - accu1 = 0d0 - accu2 = 0d0 - - do i = 1, n - wtg = 0d0 - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 - endif - enddo - - do i = 1, n - wtg = 0d0 - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 - endif - enddo - - d1_norm_trust_region = 2d0 * accu2 * (accu1 - delta**2) - -end function - -! Second derivative of (||x||^2 - Delta^2)^2 -! Version without OMP - -! *Function to compute the second derivative of ||x||^2 - Delta* - - -! \begin{equation} -! \frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] -! \end{equation} - -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ -! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -! \end{align*} -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | accu1 | double precision | first sum of the formula | -! | accu2 | double precision | second sum of the formula | -! | accu3 | double precision | third sum of the formula | -! | wtg | double precision | temporary variable to store W^T.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d2_norm_trust_region | double precision | second derivative with respect to lambda of norm(x)^2 - Delta^2 | -! | ddot | double precision | blas dot product | - - -function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Functions - double precision :: d2_norm_trust_region - double precision :: ddot - - ! Internal - double precision :: wtg,accu1,accu2,accu3 - integer :: i, j - - ! Initialization - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 !4 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 !2 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu3 = accu3 + 6d0 * wtg**2 / (e_val(i) + lambda)**4 !3 - endif - enddo - - d2_norm_trust_region = 2d0 * (accu3 * (- delta**2 + accu1) + accu2**2) - -end function - -! Function value of ||x||^2 -! Version without OMP - -! *Compute the value of ||x||^2* - -! This function computes the value of ||x(lambda)||^2 - -! \begin{align*} -! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} -! \end{align*} - -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | wtg | double precision | temporary variable to store W^T.v_grad | -! | i,j | integer | indexes | - -! Function: -! | f_norm_trust_region | double precision | value of norm(x)^2 | -! | ddot | double precision | blas dot product | - - - -function f_norm_trust_region(n,e_val,tmp_wtg,lambda) - - include 'pi.h' - - BEGIN_DOC - ! Compute ||x(lambda)||^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - - ! function - double precision :: f_norm_trust_region - double precision :: ddot - - ! internal - integer :: i,j - - ! Initialization - f_norm_trust_region = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - f_norm_trust_region = f_norm_trust_region + tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - -end function - -! OMP: First derivative of (1/||x||^2 - 1/Delta^2)^2 -! Version with OMP - -! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* - -! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -! \begin{align*} -! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 -! &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} -! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} -! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ -! &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} -! \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) -! \end{align*} - -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} -! \end{align*} - -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | wtg | double precision | temporary variable to store W^T.v_grad | -! | tmp_accu1 | double precision | temporary array for the first sum | -! | tmp_accu2 | double precision | temporary array for the second sum | -! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d1_norm_inverse_trust_region | double precision | value of the first derivative | - - -function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: accu1, accu2 - integer :: i,j - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) - - ! Functions - double precision :: d1_norm_inverse_trust_region_omp - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n)) - - ! OMP - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1, accu2) & - !$OMP DEFAULT(NONE) - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - -! !$OMP MASTER -! do i = 1, n -! if (ABS(e_val(i)+lambda) > 1d-12) then -! tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 -! endif -! enddo -! !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - -! !$OMP MASTER -! do i = 1, n -! if (ABS(e_val(i)+lambda) > 1d-12) then -! tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 -! endif -! enddo -! !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - call omp_set_max_active_levels(4) - - d1_norm_inverse_trust_region_omp = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) - - deallocate(tmp_accu1, tmp_accu2) - -end - -! OMP: Second derivative of (1/||x||^2 - 1/Delta^2)^2 -! Version with OMP - -! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* - -! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -! \begin{align*} -! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 -! &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} -! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ -! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] -! \end{align*} - - -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ -! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -! \end{align*} - -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | wtg | double precision | temporary variable to store W^T.v_grad | -! | tmp_accu1 | double precision | temporary array for the first sum | -! | tmp_accu2 | double precision | temporary array for the second sum | -! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d1_norm_inverse_trust_region | double precision | value of the first derivative | - - -function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: accu1, accu2, accu3 - integer :: i,j - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) - - ! Functions - double precision :: d2_norm_inverse_trust_region_omp - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) - - ! OMP - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & - !$OMP accu1, accu2, accu3) & - !$OMP DEFAULT(NONE) - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu3(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu3 = accu3 + tmp_accu3(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - call omp_set_max_active_levels(4) - - d2_norm_inverse_trust_region_omp = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & - - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) - - deallocate(tmp_accu1,tmp_accu2,tmp_accu3) - -end - -! First derivative of (1/||x||^2 - 1/Delta^2)^2 -! Version without OMP - -! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* - -! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -! \begin{align*} -! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 -! &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} -! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} -! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ -! &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} -! \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) -! \end{align*} -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} -! \end{align*} -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | wtg | double precision | temporary variable to store W^T.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d1_norm_inverse_trust_region | double precision | value of the first derivative | - - -function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg, accu1, accu2 - integer :: i,j - - ! Functions - double precision :: d1_norm_inverse_trust_region - - accu1 = 0d0 - accu2 = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 - endif - enddo - - d1_norm_inverse_trust_region = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) - -end - -! Second derivative of (1/||x||^2 - 1/Delta^2)^2 -! Version without OMP - -! *Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2* - -! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -! \begin{align*} -! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 -! &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} -! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ -! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] -! \end{align*} - -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ -! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -! \end{align*} - -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | wtg | double precision | temporary variable to store W^T.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d2_norm_inverse_trust_region | double precision | value of the first derivative | - - -function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg, accu1, accu2, accu3 - integer :: i,j - - ! Functions - double precision :: d2_norm_inverse_trust_region - - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu3 = accu3 + wtg**2 / (e_val(i) + lambda)**4 - endif - enddo - - d2_norm_inverse_trust_region = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & - - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) - -end diff --git a/src/utils_trust_region/trust_region_optimal_lambda.org b/src/utils_trust_region/trust_region_optimal_lambda.org deleted file mode 100644 index b39c9a10..00000000 --- a/src/utils_trust_region/trust_region_optimal_lambda.org +++ /dev/null @@ -1,1665 +0,0 @@ -* Newton's method to find the optimal lambda - -*Compute the lambda value for the trust region* - -This subroutine uses the Newton method in order to find the optimal -lambda. This constant is added on the diagonal of the hessian to shift -the eiganvalues. It has a double role: -- ensure that the resulting hessian is positive definite for the - Newton method -- constrain the step in the trust region, i.e., - $||\textbf{x}(\lambda)|| \leq \Delta$, where $\Delta$ is the radius - of the trust region. -We search $\lambda$ which minimizes -\begin{align*} - f(\lambda) = (||\textbf{x}_{(k+1)}(\lambda)||^2 -\Delta^2)^2 -\end{align*} -or -\begin{align*} - \tilde{f}(\lambda) = (\frac{1}{||\textbf{x}_{(k+1)}(\lambda)||^2}-\frac{1}{\Delta^2})^2 -\end{align*} -and gives obviously 0 in both cases. \newline - -There are several cases: -- If $\textbf{H}$ is positive definite the interval containing the - solution is $\lambda \in (0, \infty)$ (and $-h_1 < 0$). -- If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot - \textbf{g} \neq 0$ then the interval containing - the solution is $\lambda \in (-h_1, \infty)$. -- If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot - \textbf{g} = 0$ then the interval containing the solution is - $\lambda \in (-h_1, \infty)$. The terms where $|h_i - \lambda| < - 10^{-12}$ are not computed, so the term where $i = 1$ is - automatically removed and this case becomes similar to the previous one. - -So to avoid numerical problems (cf. trust_region) we start the -algorithm at $\lambda=\max(0 + \epsilon,-h_1 + \epsilon)$, -with $\epsilon$ a little constant. -The research must be restricted to the interval containing the -solution. For that reason a little trust region in 1D is used. - -The Newton method to find the optimal $\lambda$ is : -\begin{align*} - \lambda_{(l+1)} &= \lambda_{(l)} - f^{''}(\lambda)_{(l)}^{-1} f^{'}(\lambda)_{(l)}^{} \\ -\end{align*} -$f^{'}(\lambda)_{(l)}$: the first derivative of $f$ with respect to -$\lambda$ at the l-th iteration, -$f^{''}(\lambda)_{(l)}$: the second derivative of $f$ with respect to -$\lambda$ at the l-th iteration.\newline - -Noting the Newton step $y = - f^{''}(\lambda)_{(l)}^{-1} -f^{'}(\lambda)_{(l)}^{}$ we constrain $y$ such as -\begin{align*} - y \leq \alpha -\end{align*} -with $\alpha$ a scalar representing the trust length (trust region in -1D) where the function $f$ or $\tilde{f}$ is correctly describe by the -Taylor series truncated at the second order. Thus, if $y > \alpha$, -the constraint is applied as -\begin{align*} - y^* = \alpha \frac{y}{|y|} -\end{align*} -with $y^*$ the solution in the trust region. - -The size of the trust region evolves in function of $\rho$ as for the -trust region seen previously cf. trust_region, rho_model. -The prediction of the value of $f$ or $\tilde{f}$ is done using the -Taylor series truncated at the second order cf. "trust_region", -"trust_e_model". - -The first and second derivatives of $f(\lambda) = (||\textbf{x}(\lambda)||^2 - -\Delta^2)^2$ with respect to $\lambda$ are: -\begin{align*} - \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 - = 2 \left(\sum_{i=1}^n \frac{-2(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) - \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) -\end{align*} -\begin{align*} -\frac{\partial^2}{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -= 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] -\end{align*} - -The first and second derivatives of $\tilde{f}(\lambda) = (1/||\textbf{x}(\lambda)||^2 - -1/\Delta^2)^2$ with respect to $\lambda$ are: -\begin{align*} - \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 - &= 4 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} - {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - \frac{4}{\Delta^2} \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} - {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ - &= 4 \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} - \left( \frac{1}{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - \frac{1}{\Delta^2 (\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) -\end{align*} - -\begin{align*} - \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 - &= 4 \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2} - {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} - - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} - {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ - &- \frac{4}{\Delta^2} \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2} - {(h_i + \lambda)^3)})^2}{(\sum_ {i=1}^n\frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} - {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] -\end{align*} - -Provided in qp_edit: -| thresh_rho_2 | -| thresh_cc | -| nb_it_max_lambda | -| version_lambda_search | -| nb_it_max_pre_search | -see qp_edit for more details - -Input: -| n | integer | m*(m-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| tmp_wtg(n) | double precision | w_i^T.v_grad(i) | -| delta | double precision | delta for the trust region | - -Output: -| lambda | double precision | Lagrange multiplier to constrain the norm of the size of the Newton step | -| | | lambda > 0 | - -Internal: -| d1_N | double precision | value of d1_norm_trust_region | -| d2_N | double precision | value of d2_norm_trust_region | -| f_N | double precision | value of f_norm_trust_region | -| prev_f_N | double precision | previous value of f_norm_trust_region | -| f_R | double precision | (norm(x)^2 - delta^2)^2 or (1/norm(x)^2 - 1/delta^2)^2 | -| prev_f_R | double precision | previous value of f_R | -| model | double precision | predicted value of f_R from prev_f_R and y | -| d_1 | double precision | value of the first derivative | -| d_2 | double precision | value of the second derivative | -| y | double precision | Newton's step, y = -f''^-1 . f' = lambda - prev_lambda | -| prev_lambda | double precision | previous value of lambda | -| t1,t2,t3 | double precision | wall time | -| i | integer | index | -| epsilon | double precision | little constant to avoid numerical problem | -| rho_2 | double precision | (prev_f_R - f_R)/(prev_f_R - model), agreement between model and f_R | -| version | integer | version of the root finding method | - -Function: -| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | -| d2_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | -| d1_norm_inverse_trust_region | double precision | first derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | -| d2_norm_inverse_trust_region | double precision | second derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | -| f_norm_trust_region | double precision | value of norm(x)^2 | - - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) - - include 'pi.h' - - BEGIN_DOC - ! Research the optimal lambda to constrain the step size in the trust region - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(inout) :: e_val(n) - double precision, intent(in) :: delta - double precision, intent(in) :: tmp_wtg(n) - - ! out - double precision, intent(out) :: lambda - - ! Internal - double precision :: d1_N, d2_N, f_N, prev_f_N - double precision :: prev_f_R, f_R - double precision :: model - double precision :: d_1, d_2 - double precision :: t1,t2,t3 - integer :: i - double precision :: epsilon - double precision :: y - double precision :: prev_lambda - double precision :: rho_2 - double precision :: alpha - integer :: version - - ! Functions - double precision :: d1_norm_trust_region,d1_norm_trust_region_omp - double precision :: d2_norm_trust_region, d2_norm_trust_region_omp - double precision :: f_norm_trust_region, f_norm_trust_region_omp - double precision :: d1_norm_inverse_trust_region - double precision :: d2_norm_inverse_trust_region - double precision :: d1_norm_inverse_trust_region_omp - double precision :: d2_norm_inverse_trust_region_omp - - print*,'' - print*,'---Trust_newton---' - print*,'' - - call wall_time(t1) - - ! version_lambda_search - ! 1 -> ||x||^2 - delta^2 = 0, - ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better) - if (version_lambda_search == 1) then - print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' - else - print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' - endif - ! Version 2 is normally better -#+END_SRC - -Resolution with the Newton method: - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f - ! Initialization - epsilon = 1d-4 - lambda =MAX(0d0, -e_val(1)) - - ! Pre research of lambda to start near the optimal lambda - ! by adding a constant epsilon and changing the constant to - ! have ||x(lambda + epsilon)|| ~ delta, before setting - ! lambda = lambda + epsilon - print*, 'Pre research of lambda:' - print*,'Initial lambda =', lambda - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta - i = 1 - - ! To increase lambda - if (f_N > delta**2) then - print*,'Increasing lambda...' - do while (f_N > delta**2 .and. i <= nb_it_max_pre_search) - - ! Update the previous norm - prev_f_N = f_N - ! New epsilon - epsilon = epsilon * 2d0 - ! New norm - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - - print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta - - ! Security - if (prev_f_N < f_N) then - print*,'WARNING, error: prev_f_N < f_N, exit' - epsilon = epsilon * 0.5d0 - i = nb_it_max_pre_search + 1 - endif - - i = i + 1 - enddo - - ! To reduce lambda - else - print*,'Reducing lambda...' - do while (f_N < delta**2 .and. i <= nb_it_max_pre_search) - - ! Update the previous norm - prev_f_N = f_N - ! New epsilon - epsilon = epsilon * 0.5d0 - ! New norm - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - - print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta - - ! Security - if (prev_f_N > f_N) then - print*,'WARNING, error: prev_f_N > f_N, exit' - epsilon = epsilon * 2d0 - i = nb_it_max_pre_search + 1 - endif - - i = i + 1 - enddo - endif - - print*,'End of the pre research of lambda' - - ! New value of lambda - lambda = lambda + epsilon - - print*, 'e_val(1):', e_val(1) - print*, 'Staring point, lambda =', lambda - - ! thresh_cc, threshold for the research of the optimal lambda - ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc - ! thresh_rho_2, threshold to cancel the step in the research - ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2 - print*,'Threshold for the CC:', thresh_cc - print*,'Threshold for rho_2:', thresh_rho_2 - - print*, 'w_1^T . g =', tmp_wtg(1) - - ! Debug - !if (debug) then - ! print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' - !endif - - ! Initialization - i = 1 - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) ! Value of the ||x(lambda)||^2 - model = 0d0 ! predicted value of (||x||^2 - delta^2)^2 - prev_f_N = 0d0 ! previous value of ||x||^2 - prev_f_R = 0d0 ! previous value of (||x||^2 - delta^2)^2 - f_R = 0d0 ! value of (||x||^2 - delta^2)^2 - rho_2 = 0d0 ! (prev_f_R - f_R)/(prev_f_R - m) - y = 0d0 ! step size - prev_lambda = 0d0 ! previous lambda - - ! Derivatives - if (version_lambda_search == 1) then - d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 - d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 - else - d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - endif - - ! Trust length - alpha = DABS((1d0/d_2)*d_1) - - ! Newton's method - do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc) - print*,'--------------------------------------' - print*,'Research of lambda, iteration:', i - print*,'--------------------------------------' - - ! Update of f_N, f_R and the derivatives - prev_f_N = f_N - if (version_lambda_search == 1) then - prev_f_R = (prev_f_N - delta**2)**2 - d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 - d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 - else - prev_f_R = (1d0/prev_f_N - 1d0/delta**2)**2 - d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - endif - write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 - - ! Newton's step - y = -(1d0/DABS(d_2))*d_1 - - ! Constraint on y (the newton step) - if (DABS(y) > alpha) then - y = alpha * (y/DABS(y)) ! preservation of the sign of y - endif - write(*,'(a,E12.5)') ' Step length: ', y - - ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series - model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 - - ! Updates lambda - prev_lambda = lambda - lambda = prev_lambda + y - print*,'prev lambda:', prev_lambda - print*,'new lambda:', lambda - - ! Checks if lambda is in (-h_1, \infty) - if (lambda > MAX(0d0, -e_val(1))) then - ! New value of ||x(lambda)||^2 - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) - - ! New f_R - if (version_lambda_search == 1) then - f_R = (f_N - delta**2)**2 ! new value of (||x(lambda)||^2 - delta^2)^2 - else - f_R = (1d0/f_N - 1d0/delta**2)**2 ! new value of (1/||x(lambda)||^2 -1/delta^2)^2 - endif - - if (version_lambda_search == 1) then - print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R - print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R - print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model - else - print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R - print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R - print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model - endif - - print*,'previous - actual:', prev_f_R - f_R - print*,'previous - model:', prev_f_R - model - - ! Check the gain - if (DABS(prev_f_R - model) < thresh_model_2) then - print*,'' - print*,'WARNING: ABS(previous - model) <', thresh_model_2, 'rho_2 will tend toward infinity' - print*,'' - endif - - ! Will be deleted - !if (prev_f_R - f_R <= 1d-16 .or. prev_f_R - model <= 1d-16) then - ! print*,'' - ! print*,'WARNING: ABS(previous - model) <= 1d-16, exit' - ! print*,'' - ! exit - !endif - - ! Computes rho_2 - rho_2 = (prev_f_R - f_R)/(prev_f_R - model) - print*,'rho_2:', rho_2 - else - rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty) - print*,'lambda < -e_val(1) ===> rho_2 = 0' - endif - - ! Evolution of the trust length, alpha - if (rho_2 >= 0.75d0) then - alpha = 2d0 * alpha - elseif (rho_2 >= 0.5d0) then - alpha = alpha - elseif (rho_2 >= 0.25d0) then - alpha = 0.5d0 * alpha - else - alpha = 0.25d0 * alpha - endif - write(*,'(a,E12.5)') ' New trust length alpha: ', alpha - - ! cancellaion of the step if rho < 0.1 - if (rho_2 < thresh_rho_2) then !0.1d0) then - lambda = prev_lambda - f_N = prev_f_N - print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' - endif - - print*,'' - print*,'lambda, ||x||, delta:' - print*, lambda, dsqrt(f_N), delta - print*,'CC:', DABS(1d0 - f_N/delta**2) - print*,'' - - i = i + 1 - enddo - - ! if trust newton failed - if (i > nb_it_max_lambda) then - print*,'' - print*,'######################################################' - print*,'WARNING: i >', nb_it_max_lambda,'for the trust Newton' - print*,'The research of the optimal lambda has failed' - print*,'######################################################' - print*,'' - endif - - print*,'Number of iterations :', i - print*,'Value of lambda :', lambda - print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 - print*,'Error on the trust region (||x||^2 - delta^2)^2) :', (f_N - delta**2)**2 - print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 - - ! Time - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in trust_newton:', t3 - - print*,'' - print*,'---End trust_newton---' - print*,'' - -end subroutine -#+END_SRC - -* OMP: First derivative of (||x||^2 - Delta^2)^2 - -*Function to compute the first derivative of (||x||^2 - Delta^2)^2* - -This function computes the first derivative of (||x||^2 - Delta^2)^2 -with respect to lambda. - -\begin{align*} -\frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -= -4 \left(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} \right) -\left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i+ \lambda)^2} \right) -\end{align*} - -\begin{align*} - \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2} \\ - \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} -\end{align*} - -Provided: -| mo_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| accu1 | double precision | first sum of the formula | -| accu2 | double precision | second sum of the formula | -| tmp_accu1 | double precision | temporary array for the first sum | -| tmp_accu2 | double precision | temporary array for the second sum | -| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -| i,j | integer | indexes | - -Function: -| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg,accu1,accu2 - integer :: i,j - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) - - ! Functions - double precision :: d1_norm_trust_region_omp - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n)) - - ! OMP - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1,accu2) & - !$OMP DEFAULT(NONE) - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (ABS(e_val(i)) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - call omp_set_max_active_levels(4) - - d1_norm_trust_region_omp = -4d0 * accu2 * (accu1 - delta**2) - - deallocate(tmp_accu1, tmp_accu2) - -end function -#+END_SRC - -* OMP: Second derivative of (||x||^2 - Delta^2)^2 - -*Function to compute the second derivative of (||x||^2 - Delta^2)^2* - -This function computes the second derivative of (||x||^2 - Delta^2)^2 -with respect to lambda. -\begin{align*} -\frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -= 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] -\end{align*} - -\begin{align*} - \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ - \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ - \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -\end{align*} - -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| accu1 | double precision | first sum of the formula | -| accu2 | double precision | second sum of the formula | -| accu3 | double precision | third sum of the formula | -| tmp_accu1 | double precision | temporary array for the first sum | -| tmp_accu2 | double precision | temporary array for the second sum | -| tmp_accu2 | double precision | temporary array for the third sum | -| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -| i,j | integer | indexes | - -Function: -| d2_norm_trust_region | double precision | second derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Functions - double precision :: d2_norm_trust_region_omp - double precision :: ddot - - ! Internal - double precision :: accu1,accu2,accu3 - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) - integer :: i, j - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) - - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & - !$OMP accu1, accu2, accu3) & - !$OMP DEFAULT(NONE) - - ! Initialization - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - !$OMP DO - do i = 1, n - tmp_accu3(i) = 0d0 - enddo - !$OMP END DO - - ! Calculations - - ! accu1 - !$OMP DO - do i = 1, n - if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - - ! accu2 - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - ! accu3 - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu3 = accu3 + tmp_accu3(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - d2_norm_trust_region_omp = 2d0 * (6d0 * accu3 * (- delta**2 + accu1) + (-2d0 * accu2)**2) - - deallocate(tmp_accu1, tmp_accu2, tmp_accu3) - -end function -#+END_SRC - -* OMP: Function value of ||x||^2 - -*Compute the value of ||x||^2* - -This function computes the value of ||x(lambda)||^2 - -\begin{align*} -||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} -\end{align*} - -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | - -Internal: -| tmp_wtg(n) | double precision | temporary array for W^T.v_grad | -| tmp_fN | double precision | temporary array for the function | -| i,j | integer | indexes | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) - - use omp_lib - - include 'pi.h' - - BEGIN_DOC - ! Compute ||x(lambda)||^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - - ! functions - double precision :: f_norm_trust_region_omp - - ! internal - double precision, allocatable :: tmp_fN(:) - integer :: i,j - - ! Allocation - allocate(tmp_fN(n)) - - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_fN, tmp_wtg, f_norm_trust_region_omp) & - !$OMP DEFAULT(NONE) - - ! Initialization - - !$OMP MASTER - f_norm_trust_region_omp = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_fN(i) = 0d0 - enddo - !$OMP END DO - - ! Calculations - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_fN(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - f_norm_trust_region_omp = f_norm_trust_region_omp + tmp_fN(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - deallocate(tmp_fN) - -end function -#+END_SRC - -* First derivative of (||x||^2 - Delta^2)^2 -Version without omp - -*Function to compute the first derivative of ||x||^2 - Delta* - -This function computes the first derivative of (||x||^2 - Delta^2)^2 -with respect to lambda. - -\begin{align*} -\frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -= 2 \left(-2\sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) -\left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) -\end{align*} - -\begin{align*} -\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} -\end{align*} - -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| accu1 | double precision | first sum of the formula | -| accu2 | double precision | second sum of the formula | -| wtg | double precision | temporary variable to store W^T.v_grad | -| i,j | integer | indexes | - -Function: -| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | -| ddot | double precision | blas dot product | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg, accu1, accu2 - integer :: i, j - - ! Functions - double precision :: d1_norm_trust_region - double precision :: ddot - - ! Initialization - accu1 = 0d0 - accu2 = 0d0 - - do i = 1, n - wtg = 0d0 - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 - endif - enddo - - do i = 1, n - wtg = 0d0 - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 - endif - enddo - - d1_norm_trust_region = 2d0 * accu2 * (accu1 - delta**2) - -end function -#+END_SRC - -* Second derivative of (||x||^2 - Delta^2)^2 -Version without OMP - -*Function to compute the second derivative of ||x||^2 - Delta* - - -\begin{equation} -\frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -= 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] -\end{equation} - -\begin{align*} -\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ -\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -\end{align*} -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| accu1 | double precision | first sum of the formula | -| accu2 | double precision | second sum of the formula | -| accu3 | double precision | third sum of the formula | -| wtg | double precision | temporary variable to store W^T.v_grad | -| i,j | integer | indexes | - -Function: -| d2_norm_trust_region | double precision | second derivative with respect to lambda of norm(x)^2 - Delta^2 | -| ddot | double precision | blas dot product | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Functions - double precision :: d2_norm_trust_region - double precision :: ddot - - ! Internal - double precision :: wtg,accu1,accu2,accu3 - integer :: i, j - - ! Initialization - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 !4 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 !2 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu3 = accu3 + 6d0 * wtg**2 / (e_val(i) + lambda)**4 !3 - endif - enddo - - d2_norm_trust_region = 2d0 * (accu3 * (- delta**2 + accu1) + accu2**2) - -end function -#+END_SRC - -* Function value of ||x||^2 -Version without OMP - -*Compute the value of ||x||^2* - -This function computes the value of ||x(lambda)||^2 - -\begin{align*} -||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} -\end{align*} - -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| wtg | double precision | temporary variable to store W^T.v_grad | -| i,j | integer | indexes | - -Function: -| f_norm_trust_region | double precision | value of norm(x)^2 | -| ddot | double precision | blas dot product | - - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function f_norm_trust_region(n,e_val,tmp_wtg,lambda) - - include 'pi.h' - - BEGIN_DOC - ! Compute ||x(lambda)||^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - - ! function - double precision :: f_norm_trust_region - double precision :: ddot - - ! internal - integer :: i,j - - ! Initialization - f_norm_trust_region = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - f_norm_trust_region = f_norm_trust_region + tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - -end function -#+END_SRC - -* OMP: First derivative of (1/||x||^2 - 1/Delta^2)^2 -Version with OMP - -*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* - -This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -\begin{align*} - \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 - &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} - {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} - {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ - &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} - \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) -\end{align*} - -\begin{align*} -\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} -\end{align*} - -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| wtg | double precision | temporary variable to store W^T.v_grad | -| tmp_accu1 | double precision | temporary array for the first sum | -| tmp_accu2 | double precision | temporary array for the second sum | -| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -| i,j | integer | indexes | - -Function: -| d1_norm_inverse_trust_region | double precision | value of the first derivative | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: accu1, accu2 - integer :: i,j - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) - - ! Functions - double precision :: d1_norm_inverse_trust_region_omp - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n)) - - ! OMP - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1, accu2) & - !$OMP DEFAULT(NONE) - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - -! !$OMP MASTER -! do i = 1, n -! if (ABS(e_val(i)+lambda) > 1d-12) then -! tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 -! endif -! enddo -! !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - -! !$OMP MASTER -! do i = 1, n -! if (ABS(e_val(i)+lambda) > 1d-12) then -! tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 -! endif -! enddo -! !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - call omp_set_max_active_levels(4) - - d1_norm_inverse_trust_region_omp = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) - - deallocate(tmp_accu1, tmp_accu2) - -end -#+END_SRC - -* OMP: Second derivative of (1/||x||^2 - 1/Delta^2)^2 -Version with OMP - -*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* - -This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -\begin{align*} - \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 - &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} - - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ - &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] -\end{align*} - - -\begin{align*} -\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ -\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -\end{align*} - -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| wtg | double precision | temporary variable to store W^T.v_grad | -| tmp_accu1 | double precision | temporary array for the first sum | -| tmp_accu2 | double precision | temporary array for the second sum | -| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -| i,j | integer | indexes | - -Function: -| d1_norm_inverse_trust_region | double precision | value of the first derivative | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: accu1, accu2, accu3 - integer :: i,j - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) - - ! Functions - double precision :: d2_norm_inverse_trust_region_omp - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) - - ! OMP - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & - !$OMP accu1, accu2, accu3) & - !$OMP DEFAULT(NONE) - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu3(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu3 = accu3 + tmp_accu3(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - call omp_set_max_active_levels(4) - - d2_norm_inverse_trust_region_omp = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & - - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) - - deallocate(tmp_accu1,tmp_accu2,tmp_accu3) - -end -#+END_SRC - -* First derivative of (1/||x||^2 - 1/Delta^2)^2 -Version without OMP - -*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* - -This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -\begin{align*} - \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 - &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} - {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} - {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ - &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} - \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) -\end{align*} -\begin{align*} -\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} -\end{align*} -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| wtg | double precision | temporary variable to store W^T.v_grad | -| i,j | integer | indexes | - -Function: -| d1_norm_inverse_trust_region | double precision | value of the first derivative | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg, accu1, accu2 - integer :: i,j - - ! Functions - double precision :: d1_norm_inverse_trust_region - - accu1 = 0d0 - accu2 = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 - endif - enddo - - d1_norm_inverse_trust_region = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) - -end -#+END_SRC - -* Second derivative of (1/||x||^2 - 1/Delta^2)^2 -Version without OMP - -*Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2* - -This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -\begin{align*} - \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 - &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} - - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ - &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] -\end{align*} - -\begin{align*} -\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ -\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -\end{align*} - -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| wtg | double precision | temporary variable to store W^T.v_grad | -| i,j | integer | indexes | - -Function: -| d2_norm_inverse_trust_region | double precision | value of the first derivative | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg, accu1, accu2, accu3 - integer :: i,j - - ! Functions - double precision :: d2_norm_inverse_trust_region - - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu3 = accu3 + wtg**2 / (e_val(i) + lambda)**4 - endif - enddo - - d2_norm_inverse_trust_region = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & - - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) - -end -#+END_SRC diff --git a/src/utils_trust_region/trust_region_rho.irp.f b/src/utils_trust_region/trust_region_rho.irp.f deleted file mode 100644 index 45738736..00000000 --- a/src/utils_trust_region/trust_region_rho.irp.f +++ /dev/null @@ -1,121 +0,0 @@ -! Agreement with the model: Rho - -! *Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)* - -! Rho represents the agreement between the model (the predicted energy -! by the Taylor expansion truncated at the 2nd order) and the real -! energy : - -! \begin{equation} -! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} -! \end{equation} -! With : -! $E^{k}$ the energy at the previous iteration -! $E^{k+1}$ the energy at the actual iteration -! $m^{k+1}$ the predicted energy for the actual iteration -! (cf. trust_e_model) - -! If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$. -! If $\rho \leq 0$ the previous energy is lower than the actual -! energy. We have to cancel the last step and use a smaller trust -! region. -! Here we cancel the last step if $\rho < 0.1$, because even if -! the energy decreases, the agreement is bad, i.e., the Taylor expansion -! truncated at the second order doesn't represent correctly the energy -! landscape. So it's better to cancel the step and restart with a -! smaller trust region. - -! Provided in qp_edit: -! | thresh_rho | - -! Input: -! | prev_energy | double precision | previous energy (energy before the rotation) | -! | e_model | double precision | predicted energy after the rotation | - -! Output: -! | rho | double precision | the agreement between the model (predicted) and the real energy | -! | prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy | -! | | | else the previous energy doesn't change | - -! Internal: -! | energy | double precision | energy (real) after the rotation | -! | i | integer | index | -! | t* | double precision | time | - - -subroutine trust_region_rho(prev_energy, energy,e_model,rho) - - include 'pi.h' - - BEGIN_DOC - ! Compute rho, the agreement between the predicted criterion/energy and the real one - END_DOC - - implicit none - - ! Variables - - ! In - double precision, intent(inout) :: prev_energy - double precision, intent(in) :: e_model, energy - - ! Out - double precision, intent(out) :: rho - - ! Internal - double precision :: t1, t2, t3 - integer :: i - - print*,'' - print*,'---Rho_model---' - - call wall_time(t1) - -! Rho -! \begin{equation} -! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} -! \end{equation} - -! In function of $\rho$ th step can be accepted or cancelled. - -! If we cancel the last step (k+1), the previous energy (k) doesn't -! change! -! If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) - - -! Already done in an other subroutine - !if (ABS(prev_energy - e_model) < 1d-12) then - ! print*,'WARNING: prev_energy - e_model < 1d-12' - ! print*,'=> rho will tend toward infinity' - ! print*,'Check you convergence criterion !' - !endif - - rho = (prev_energy - energy) / (prev_energy - e_model) - - print*, 'previous energy, prev_energy :', prev_energy - print*, 'predicted energy, e_model :', e_model - print*, 'real energy, energy :', energy - print*, 'prev_energy - energy :', prev_energy - energy - print*, 'prev_energy - e_model :', prev_energy - e_model - print*, 'Rho :', rho - print*, 'Threshold for rho:', thresh_rho - - ! Modification of prev_energy in function of rho - if (rho < thresh_rho) then !0.1) then - ! the step is cancelled - print*, 'Rho <', thresh_rho,', the previous energy does not changed' - print*, 'prev_energy :', prev_energy - else - ! the step is accepted - prev_energy = energy - print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy - endif - - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in rho model:', t3 - - print*,'---End rho_model---' - print*,'' - -end subroutine diff --git a/src/utils_trust_region/trust_region_rho.org b/src/utils_trust_region/trust_region_rho.org deleted file mode 100644 index 9b25ee29..00000000 --- a/src/utils_trust_region/trust_region_rho.org +++ /dev/null @@ -1,123 +0,0 @@ -* Agreement with the model: Rho - -*Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)* - -Rho represents the agreement between the model (the predicted energy -by the Taylor expansion truncated at the 2nd order) and the real -energy : - -\begin{equation} -\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} -\end{equation} -With : -$E^{k}$ the energy at the previous iteration -$E^{k+1}$ the energy at the actual iteration -$m^{k+1}$ the predicted energy for the actual iteration -(cf. trust_e_model) - -If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$. -If $\rho \leq 0$ the previous energy is lower than the actual -energy. We have to cancel the last step and use a smaller trust -region. -Here we cancel the last step if $\rho < 0.1$, because even if -the energy decreases, the agreement is bad, i.e., the Taylor expansion -truncated at the second order doesn't represent correctly the energy -landscape. So it's better to cancel the step and restart with a -smaller trust region. - -Provided in qp_edit: -| thresh_rho | - -Input: -| prev_energy | double precision | previous energy (energy before the rotation) | -| e_model | double precision | predicted energy after the rotation | - -Output: -| rho | double precision | the agreement between the model (predicted) and the real energy | -| prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy | -| | | else the previous energy doesn't change | - -Internal: -| energy | double precision | energy (real) after the rotation | -| i | integer | index | -| t* | double precision | time | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f -subroutine trust_region_rho(prev_energy, energy,e_model,rho) - - include 'pi.h' - - BEGIN_DOC - ! Compute rho, the agreement between the predicted criterion/energy and the real one - END_DOC - - implicit none - - ! Variables - - ! In - double precision, intent(inout) :: prev_energy - double precision, intent(in) :: e_model, energy - - ! Out - double precision, intent(out) :: rho - - ! Internal - double precision :: t1, t2, t3 - integer :: i - - print*,'' - print*,'---Rho_model---' - - call wall_time(t1) -#+END_SRC - -** Rho -\begin{equation} -\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} -\end{equation} - -In function of $\rho$ th step can be accepted or cancelled. - -If we cancel the last step (k+1), the previous energy (k) doesn't -change! -If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) - -#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f - ! Already done in an other subroutine - !if (ABS(prev_energy - e_model) < 1d-12) then - ! print*,'WARNING: prev_energy - e_model < 1d-12' - ! print*,'=> rho will tend toward infinity' - ! print*,'Check you convergence criterion !' - !endif - - rho = (prev_energy - energy) / (prev_energy - e_model) - - print*, 'previous energy, prev_energy :', prev_energy - print*, 'predicted energy, e_model :', e_model - print*, 'real energy, energy :', energy - print*, 'prev_energy - energy :', prev_energy - energy - print*, 'prev_energy - e_model :', prev_energy - e_model - print*, 'Rho :', rho - print*, 'Threshold for rho:', thresh_rho - - ! Modification of prev_energy in function of rho - if (rho < thresh_rho) then !0.1) then - ! the step is cancelled - print*, 'Rho <', thresh_rho,', the previous energy does not changed' - print*, 'prev_energy :', prev_energy - else - ! the step is accepted - prev_energy = energy - print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy - endif - - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in rho model:', t3 - - print*,'---End rho_model---' - print*,'' - -end subroutine -#+END_SRC diff --git a/src/utils_trust_region/trust_region_step.irp.f b/src/utils_trust_region/trust_region_step.irp.f deleted file mode 100644 index 42aa6ed4..00000000 --- a/src/utils_trust_region/trust_region_step.irp.f +++ /dev/null @@ -1,716 +0,0 @@ -! Trust region - -! *Compute the next step with the trust region algorithm* - -! The Newton method is an iterative method to find a minimum of a given -! function. It uses a Taylor series truncated at the second order of the -! targeted function and gives its minimizer. The minimizer is taken as -! the new position and the same thing is done. And by doing so -! iteratively the method find a minimum, a local or global one depending -! of the starting point and the convexity/nonconvexity of the targeted -! function. - -! The goal of the trust region is to constrain the step size of the -! Newton method in a certain area around the actual position, where the -! Taylor series is a good approximation of the targeted function. This -! area is called the "trust region". - -! In addition, in function of the agreement between the Taylor -! development of the energy and the real energy, the size of the trust -! region will be updated at each iteration. By doing so, the step sizes -! are not too larges. In addition, since we add a criterion to cancel the -! step if the energy increases (more precisely if rho < 0.1), so it's -! impossible to diverge. \newline - -! References: \newline -! Nocedal & Wright, Numerical Optimization, chapter 4 (1999), \newline -! https://link.springer.com/book/10.1007/978-0-387-40065-5, \newline -! ISBN: 978-0-387-40065-5 \newline - -! By using the first and the second derivatives, the Newton method gives -! a step: -! \begin{align*} -! \textbf{x}_{(k+1)}^{\text{Newton}} = - \textbf{H}_{(k)}^{-1} \cdot -! \textbf{g}_{(k)} -! \end{align*} -! which leads to the minimizer of the Taylor series. -! !!! Warning: the Newton method gives the minimizer if and only if -! $\textbf{H}$ is positive definite, else it leads to a saddle point !!! -! But we want a step $\textbf{x}_{(k+1)}$ with a constraint on its (euclidian) norm: -! \begin{align*} -! ||\textbf{x}_{(k+1)}|| \leq \Delta_{(k+1)} -! \end{align*} -! which is equivalent to -! \begin{align*} -! \textbf{x}_{(k+1)}^T \cdot \textbf{x}_{(k+1)} \leq \Delta_{(k+1)}^2 -! \end{align*} - -! with: \newline -! $\textbf{x}_{(k+1)}$ is the step for the k+1-th iteration (vector of -! size n) \newline -! $\textbf{H}_{(k)}$ is the hessian at the k-th iteration (n by n -! matrix) \newline -! $\textbf{g}_{(k)}$ is the gradient at the k-th iteration (vector of -! size n) \newline -! $\Delta_{(k+1)}$ is the trust radius for the (k+1)-th iteration -! \newline - -! Thus we want to constrain the step size $\textbf{x}_{(k+1)}$ into a -! hypersphere of radius $\Delta_{(k+1)}$.\newline - -! So, if $||\textbf{x}_{(k+1)}^{\text{Newton}}|| \leq \Delta_{(k)}$ and -! $\textbf{H}$ is positive definite, the -! solution is the step given by the Newton method -! $\textbf{x}_{(k+1)} = \textbf{x}_{(k+1)}^{\text{Newton}}$. -! Else we have to constrain the step size. For simplicity we will remove -! the index $_{(k)}$ and $_{(k+1)}$. To restict the step size, we have -! to put a constraint on $\textbf{x}$ with a Lagrange multiplier. -! Starting from the Taylor series of a function E (here, the energy) -! truncated at the 2nd order, we have: -! \begin{align*} -! E(\textbf{x}) = E +\textbf{g}^T \cdot \textbf{x} + \frac{1}{2} -! \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} + -! \mathcal{O}(\textbf{x}^2) -! \end{align*} - -! With the constraint on the norm of $\textbf{x}$ we can write the -! Lagrangian -! \begin{align*} -! \mathcal{L}(\textbf{x},\lambda) = E + \textbf{g}^T \cdot \textbf{x} -! + \frac{1}{2} \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} -! + \frac{1}{2} \lambda (\textbf{x}^T \cdot \textbf{x} - \Delta^2) -! \end{align*} -! Where: \newline -! $\lambda$ is the Lagrange multiplier \newline -! $E$ is the energy at the k-th iteration $\Leftrightarrow -! E(\textbf{x} = \textbf{0})$ \newline - -! To solve this equation, we search a stationary point where the first -! derivative of $\mathcal{L}$ with respect to $\textbf{x}$ becomes 0, i.e. -! \begin{align*} -! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}=0 -! \end{align*} - -! The derivative is: -! \begin{align*} -! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} -! = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} -! \end{align*} - -! So, we search $\textbf{x}$ such as: -! \begin{align*} -! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} -! = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} = 0 -! \end{align*} - -! We can rewrite that as: -! \begin{align*} -! \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} -! = \textbf{g} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x} = 0 -! \end{align*} -! with $\textbf{I}$ is the identity matrix. - -! By doing so, the solution is: -! \begin{align*} -! (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x}= -\textbf{g} -! \end{align*} -! \begin{align*} -! \textbf{x}= - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} -! \end{align*} -! with $\textbf{x}^T \textbf{x} = \Delta^2$. - -! We have to solve this previous equation to find this $\textbf{x}$ in the -! trust region, i.e. $||\textbf{x}|| = \Delta$. Now, this problem is -! just a one dimension problem because we can express $\textbf{x}$ as a -! function of $\lambda$: -! \begin{align*} -! \textbf{x}(\lambda) = - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} -! \end{align*} - -! We start from the fact that the hessian is diagonalizable. So we have: -! \begin{align*} -! \textbf{H} = \textbf{W} \cdot \textbf{h} \cdot \textbf{W}^T -! \end{align*} -! with: \newline -! $\textbf{H}$, the hessian matrix \newline -! $\textbf{W}$, the matrix containing the eigenvectors \newline -! $\textbf{w}_i$, the i-th eigenvector, i.e. i-th column of $\textbf{W}$ \newline -! $\textbf{h}$, the matrix containing the eigenvalues in ascending order \newline -! $h_i$, the i-th eigenvalue in ascending order \newline - -! Now we use the fact that adding a constant on the diagonal just shifts -! the eigenvalues: -! \begin{align*} -! \textbf{H} + \textbf{I} \lambda = \textbf{W} \cdot (\textbf{h} -! +\textbf{I} \lambda) \cdot \textbf{W}^T -! \end{align*} - -! By doing so we can express $\textbf{x}$ as a function of $\lambda$ -! \begin{align*} -! \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot -! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i -! \end{align*} -! with $\lambda \neq - h_i$. - -! An interesting thing in our case is the norm of $\textbf{x}$, -! because we want $||\textbf{x}|| = \Delta$. Due to the orthogonality of -! the eigenvectors $\left\{\textbf{w} \right\} _{i=1}^n$ we have: -! \begin{align*} -! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot -! \textbf{g})^2}{(h_i + \lambda)^2} -! \end{align*} - -! So the $||\textbf{x}(\lambda)||^2$ is just a function of $\lambda$. -! And if we study the properties of this function we see that: -! \begin{align*} -! \lim_{\lambda\to\infty} ||\textbf{x}(\lambda)|| = 0 -! \end{align*} -! and if $\textbf{w}_i^T \cdot \textbf{g} \neq 0$: -! \begin{align*} -! \lim_{\lambda\to -h_i} ||\textbf{x}(\lambda)|| = + \infty -! \end{align*} - -! From these limits and knowing that $h_1$ is the lowest eigenvalue, we -! can conclude that $||\textbf{x}(\lambda)||$ is a continuous and -! strictly decreasing function on the interval $\lambda \in -! (-h_1;\infty)$. Thus, there is one $\lambda$ in this interval which -! gives $||\textbf{x}(\lambda)|| = \Delta$, consequently there is one -! solution. - -! Since $\textbf{x} = - (\textbf{H} + \lambda \textbf{I})^{-1} \cdot -! \textbf{g}$ and we want to reduce the norm of $\textbf{x}$, clearly, -! $\lambda > 0$ ($\lambda = 0$ is the unconstraint solution). But the -! Newton method is only defined for a positive definite hessian matrix, -! so $(\textbf{H} + \textbf{I} \lambda)$ must be positive -! definite. Consequently, in the case where $\textbf{H}$ is not positive -! definite, to ensure the positive definiteness, $\lambda$ must be -! greater than $- h_1$. -! \begin{align*} -! \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1 -! \end{align*} - -! From that there are five cases: -! - if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$ -! - if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot -! \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I} -! \lambda)$ -! must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$ -! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot -! \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing -! $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be -! positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$) -! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot -! \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing -! $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be -! positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is -! similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda = -! 0)|| \leq \Delta$ -! but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ -! time a constant to ensure the condition $||\textbf{x}(\lambda = -! -h_1)|| = \Delta$ and escape from the saddle point - -! Thus to find the solution, we can write: -! \begin{align*} -! ||\textbf{x}(\lambda)|| = \Delta -! \end{align*} -! \begin{align*} -! ||\textbf{x}(\lambda)|| - \Delta = 0 -! \end{align*} - -! Taking the square of this equation -! \begin{align*} -! (||\textbf{x}(\lambda)|| - \Delta)^2 = 0 -! \end{align*} -! we have a function with one minimum for the optimal $\lambda$. -! Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve -! \begin{align*} -! (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 -! \end{align*} - -! But in practice, it is more effective to solve: -! \begin{align*} -! (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 -! \end{align*} - -! To do that, we just use the Newton method with "trust_newton" using -! first and second derivative of $(||\textbf{x}(\lambda)||^2 - -! \Delta^2)^2$ with respect to $\textbf{x}$. -! This will give the optimal $\lambda$ to compute the -! solution $\textbf{x}$ with the formula seen previously: -! \begin{align*} -! \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot -! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i -! \end{align*} - -! The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our -! step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$. - - - - -! Evolution of the trust region - -! We initialize the trust region at the first iteration using a radius -! \begin{align*} -! \Delta = ||\textbf{x}(\lambda=0)|| -! \end{align*} - -! And for the next iteration the trust region will evolves depending of -! the agreement of the energy prediction based on the Taylor series -! truncated at the 2nd order and the real energy. If the Taylor series -! truncated at the 2nd order represents correctly the energy landscape -! the trust region will be extent else it will be reduced. In order to -! mesure this agreement we use the ratio rho cf. "rho_model" and -! "trust_e_model". From that we use the following values: -! - if $\rho \geq 0.75$, then $\Delta = 2 \Delta$, -! - if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, -! - if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, -! - if $\rho < 0.25$, then $\Delta = 0.25 \Delta$. - -! In addition, if $\rho < 0.1$ the iteration is cancelled, so it -! restarts with a smaller trust region until the energy decreases. - - - - -! Summary - -! To summarize, knowing the hessian (eigenvectors and eigenvalues), the -! gradient and the radius of the trust region we can compute the norm of -! the Newton step -! \begin{align*} -! ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n -! \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0 -! \end{align*} - -! - if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and -! $\textbf{x}(\lambda=0)$ is in the trust region and it is not -! necessary to put a constraint on $\textbf{x}$, the solution is the -! unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$. -! - else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and -! $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in -! the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda = -! -h_1)$, similarly to the previous case. -! But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ -! time a constant to ensure the condition $||\textbf{x}(\lambda = -! -h_1)|| = \Delta$ and escape from the saddle point -! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we -! have to search $\lambda \in (-h_1, \infty)$ such as -! $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method -! \begin{align*} -! (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 -! \end{align*} -! or -! \begin{align*} -! (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 -! \end{align*} -! which is numerically more stable. And finally compute -! \begin{align*} -! \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot -! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i -! \end{align*} -! - else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we -! do exactly the same thing that the previous case but we search -! $\lambda \in (0, \infty)$ -! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and -! $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the -! sum), again we do exactly the same thing that the previous case -! searching $\lambda \in (-h_1, \infty)$. - - -! For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not -! necessary in fact to remove the $j = 1$ in the sum since the term -! where $h_i - \lambda < 10^{-6}$ are not computed. - -! After that, we take this vector $\textbf{x}^*$, called "x", and we do -! the transformation to an antisymmetric matrix $\textbf{X}$, called -! m_x. This matrix $\textbf{X}$ will be used to compute a rotation -! matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix". - -! NB: -! An improvement can be done using a elleptical trust region. - - - - -! Code - -! Provided: -! | mo_num | integer | number of MOs | - -! Cf. qp_edit in orbital optimization section, for some constants/thresholds - -! Input: -! | m | integer | number of MOs | -! | n | integer | m*(m-1)/2 | -! | H(n, n) | double precision | hessian | -! | v_grad(n) | double precision | gradient | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n, n) | double precision | eigenvectors of the hessian | -! | rho | double precision | agreement between the model and the reality, | -! | | | represents the quality of the energy prediction | -! | nb_iter | integer | number of iteration | - -! Input/Ouput: -! | delta | double precision | radius of the trust region | - -! Output: -! | x(n) | double precision | vector containing the step | - -! Internal: -! | accu | double precision | temporary variable to compute the step | -! | lambda | double precision | lagrange multiplier | -! | trust_radius2 | double precision | square of the radius of the trust region | -! | norm2_x | double precision | norm^2 of the vector x | -! | norm2_g | double precision | norm^2 of the vector containing the gradient | -! | tmp_wtg(n) | double precision | tmp_wtg(i) = w_i^T . g | -! | i, j, k | integer | indexes | - -! Function: -! | dnrm2 | double precision | Blas function computing the norm | -! | f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) | - - -subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compuet the step in the trust region - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: v_grad(n), rho - integer, intent(inout) :: nb_iter - double precision, intent(in) :: e_val(n), w(n,n) - - ! inout - double precision, intent(inout) :: delta - - ! out - double precision, intent(out) :: x(n) - - ! Internal - double precision :: accu, lambda, trust_radius2 - double precision :: norm2_x, norm2_g - double precision, allocatable :: tmp_wtg(:) - integer :: i,j,k - double precision :: t1,t2,t3 - integer :: n_neg_eval - - - ! Functions - double precision :: ddot, dnrm2 - double precision :: f_norm_trust_region_omp - - print*,'' - print*,'==================' - print*,'---Trust_region---' - print*,'==================' - - call wall_time(t1) - - ! Allocation - allocate(tmp_wtg(n)) - -! Initialization and norm - -! The norm of the step size will be useful for the trust region -! algorithm. We start from a first guess and the radius of the trust -! region will evolve during the optimization. - -! avoid_saddle is actually a test to avoid saddle points - - -! Initialization of the Lagrange multiplier -lambda = 0d0 - -! List of w^T.g, to avoid the recomputation -tmp_wtg = 0d0 -do j = 1, n - do i = 1, n - tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) - enddo -enddo - -! Replacement of the small tmp_wtg corresponding to a negative eigenvalue -! in the case of avoid_saddle -if (avoid_saddle .and. e_val(1) < - thresh_eig) then - i = 2 - ! Number of negative eigenvalues - do while (e_val(i) < - thresh_eig) - if (tmp_wtg(i) < thresh_wtg2) then - if (version_avoid_saddle == 1) then - tmp_wtg(i) = 1d0 - elseif (version_avoid_saddle == 2) then - tmp_wtg(i) = DABS(e_val(i)) - elseif (version_avoid_saddle == 3) then - tmp_wtg(i) = dsqrt(DABS(e_val(i))) - else - tmp_wtg(i) = thresh_wtg2 - endif - endif - i = i + 1 - enddo - - ! For the fist one it's a little bit different - if (tmp_wtg(1) < thresh_wtg2) then - tmp_wtg(1) = 0d0 - endif - -endif - -! Norm^2 of x, ||x||^2 -norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) -! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta -! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm -! Anyway if the step is too big it will be reduced -print*,'||x||^2 :', norm2_x - -! Norm^2 of the gradient, ||v_grad||^2 -norm2_g = (dnrm2(n,v_grad,1))**2 -print*,'||grad||^2 :', norm2_g - -! Trust radius initialization - -! At the first iteration (nb_iter = 0) we initialize the trust region -! with the norm of the step generate by the Newton's method ($\textbf{x}_1 = -! (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$, -! we compute this norm using f_norm_trust_region_omp as explain just -! below) - - -! trust radius -if (nb_iter == 0) then - trust_radius2 = norm2_x - ! To avoid infinite loop of cancellation of this first step - ! without changing delta - nb_iter = 1 - - ! Compute delta, delta = sqrt(trust_radius) - delta = dsqrt(trust_radius2) -endif - -! Modification of the trust radius - -! In function of rho (which represents the agreement between the model -! and the reality, cf. rho_model) the trust region evolves. We update -! delta (the radius of the trust region). - -! To avoid too big trust region we put a maximum size. - - -! Modification of the trust radius in function of rho -if (rho >= 0.75d0) then - delta = 2d0 * delta -elseif (rho >= 0.5d0) then - delta = delta -elseif (rho >= 0.25d0) then - delta = 0.5d0 * delta -else - delta = 0.25d0 * delta -endif - -! Maximum size of the trust region -!if (delta > 0.5d0 * n * pi) then -! delta = 0.5d0 * n * pi -! print*,'Delta > delta_max, delta = 0.5d0 * n * pi' -!endif - -if (delta > 1d10) then - delta = 1d10 -endif - -print*, 'Delta :', delta - -! Calculation of the optimal lambda - -! We search the solution of $(||x||^2 - \Delta^2)^2 = 0$ -! - If $||\textbf{x}|| > \Delta$ or $h_1 < 0$ we have to add a constant -! $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$ -! - If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the -! unconstrained one, $\lambda = 0$ - -! You will find more details at the beginning - - -! By giving delta, we search (||x||^2 - delta^2)^2 = 0 -! and not (||x||^2 - delta)^2 = 0 - -! Research of lambda to solve ||x(lambda)|| = Delta - -! Display -print*, 'e_val(1) = ', e_val(1) -print*, 'w_1^T.g =', tmp_wtg(1) - -! H positive definite -if (e_val(1) > - thresh_eig) then - norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) - print*, '||x(0)||=', dsqrt(norm2_x) - print*, 'Delta=', delta - - ! H positive definite, ||x(lambda = 0)|| <= Delta - if (dsqrt(norm2_x) <= delta) then - print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' - print*, 'lambda = 0, no lambda optimization' - lambda = 0d0 - - ! H positive definite, ||x(lambda = 0)|| > Delta - else - ! Constraint solution - print*, 'H positive definite, ||x(lambda = 0)|| > Delta' - print*,'Computation of the optimal lambda...' - call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) - endif - -! H indefinite -else - if (DABS(tmp_wtg(1)) < thresh_wtg) then - norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1)) - print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) - endif - - ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta - if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then - ! Add e_val(1) in order to have (H - e_val(1) I) positive definite - print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' - print*, 'lambda = -e_val(1), no lambda optimization' - lambda = - e_val(1) - - ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta - ! and - ! H indefinite, w_1^T.g =/= 0 - else - ! Constraint solution/ add lambda - if (DABS(tmp_wtg(1)) < thresh_wtg) then - print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' - else - print*, 'H indefinite, w_1^T.g =/= 0' - endif - print*, 'Computation of the optimal lambda...' - call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) - endif - -endif - -! Recomputation of the norm^2 of the step x -norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) -print*,'' -print*,'Summary after the trust region:' -print*,'lambda:', lambda -print*,'||x||:', dsqrt(norm2_x) -print*,'delta:', delta - -! Calculation of the step x - -! x refers to $\textbf{x}^*$ -! We compute x in function of lambda using its formula : -! \begin{align*} -! \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i -! + \lambda} \cdot \textbf{w}_i -! \end{align*} - - -! Initialisation -x = 0d0 - -! Calculation of the step x - -! Normal version -if (.not. absolute_eig) then - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - do j = 1, n - x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) - enddo - endif - enddo - -! Version to use the absolute value of the eigenvalues -else - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig) then - do j = 1, n - x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) - enddo - endif - enddo - -endif - -double precision :: beta, norm_x - -! Test -! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1) -! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first -! eigenvectors multiply by a constant to ensure the condition -! ||x(lambda=-e_val(1))|| = delta and escape the saddle point -if (avoid_saddle .and. e_val(1) < - thresh_eig) then - if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then - - ! norm of x - norm_x = dnrm2(n,x,1) - - ! Computes the coefficient for the w_1 - beta = delta**2 - norm_x**2 - - ! Updates the step x - x = x + W(:,1) * dsqrt(beta) - - ! Recomputes the norm to check - norm_x = dnrm2(n,x,1) - - print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):' - print*, '||x||', norm_x - endif -endif - -! Transformation of x - -! x is a vector of size n, so it can be write as a m by m -! antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index". - - -! ! Step transformation vector -> matrix -! ! Vector with n element -> mo_num by mo_num matrix -! do j = 1, m -! do i = 1, m -! if (i>j) then -! call mat_to_vec_index(i,j,k) -! m_x(i,j) = x(k) -! else -! m_x(i,j) = 0d0 -! endif -! enddo -! enddo -! -! ! Antisymmetrization of the previous matrix -! do j = 1, m -! do i = 1, m -! if (i 0$ ($\lambda = 0$ is the unconstraint solution). But the -Newton method is only defined for a positive definite hessian matrix, -so $(\textbf{H} + \textbf{I} \lambda)$ must be positive -definite. Consequently, in the case where $\textbf{H}$ is not positive -definite, to ensure the positive definiteness, $\lambda$ must be -greater than $- h_1$. -\begin{align*} - \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1 -\end{align*} - -From that there are five cases: -- if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$ -- if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot - \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I} - \lambda)$ - must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$ -- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot - \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing - $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be - positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$) -- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot - \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing - $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be - positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is - similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda = - 0)|| \leq \Delta$ - but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ - time a constant to ensure the condition $||\textbf{x}(\lambda = - -h_1)|| = \Delta$ and escape from the saddle point - -Thus to find the solution, we can write: -\begin{align*} - ||\textbf{x}(\lambda)|| = \Delta -\end{align*} -\begin{align*} - ||\textbf{x}(\lambda)|| - \Delta = 0 -\end{align*} - -Taking the square of this equation -\begin{align*} - (||\textbf{x}(\lambda)|| - \Delta)^2 = 0 -\end{align*} -we have a function with one minimum for the optimal $\lambda$. -Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve -\begin{align*} - (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 -\end{align*} - -But in practice, it is more effective to solve: -\begin{align*} - (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 -\end{align*} - -To do that, we just use the Newton method with "trust_newton" using -first and second derivative of $(||\textbf{x}(\lambda)||^2 - -\Delta^2)^2$ with respect to $\textbf{x}$. -This will give the optimal $\lambda$ to compute the -solution $\textbf{x}$ with the formula seen previously: -\begin{align*} - \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot - \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i -\end{align*} - -The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our -step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$. - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f -#+END_SRC - -** Evolution of the trust region - -We initialize the trust region at the first iteration using a radius -\begin{align*} - \Delta = ||\textbf{x}(\lambda=0)|| -\end{align*} - -And for the next iteration the trust region will evolves depending of -the agreement of the energy prediction based on the Taylor series -truncated at the 2nd order and the real energy. If the Taylor series -truncated at the 2nd order represents correctly the energy landscape -the trust region will be extent else it will be reduced. In order to -mesure this agreement we use the ratio rho cf. "rho_model" and -"trust_e_model". From that we use the following values: -- if $\rho \geq 0.75$, then $\Delta = 2 \Delta$, -- if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, -- if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, -- if $\rho < 0.25$, then $\Delta = 0.25 \Delta$. - -In addition, if $\rho < 0.1$ the iteration is cancelled, so it -restarts with a smaller trust region until the energy decreases. - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f -#+END_SRC - -** Summary - -To summarize, knowing the hessian (eigenvectors and eigenvalues), the -gradient and the radius of the trust region we can compute the norm of -the Newton step -\begin{align*} - ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n - \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0 -\end{align*} - -- if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and - $\textbf{x}(\lambda=0)$ is in the trust region and it is not - necessary to put a constraint on $\textbf{x}$, the solution is the - unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$. -- else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and - $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in - the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda = - -h_1)$, similarly to the previous case. - But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ - time a constant to ensure the condition $||\textbf{x}(\lambda = - -h_1)|| = \Delta$ and escape from the saddle point -- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we - have to search $\lambda \in (-h_1, \infty)$ such as - $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method - \begin{align*} - (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 - \end{align*} - or - \begin{align*} - (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 - \end{align*} - which is numerically more stable. And finally compute - \begin{align*} - \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot - \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i - \end{align*} -- else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we - do exactly the same thing that the previous case but we search - $\lambda \in (0, \infty)$ -- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and - $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the - sum), again we do exactly the same thing that the previous case - searching $\lambda \in (-h_1, \infty)$. - - -For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not -necessary in fact to remove the $j = 1$ in the sum since the term -where $h_i - \lambda < 10^{-6}$ are not computed. - -After that, we take this vector $\textbf{x}^*$, called "x", and we do -the transformation to an antisymmetric matrix $\textbf{X}$, called -m_x. This matrix $\textbf{X}$ will be used to compute a rotation -matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix". - -NB: -An improvement can be done using a elleptical trust region. - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f -#+END_SRC - -** Code - -Provided: -| mo_num | integer | number of MOs | - -Cf. qp_edit in orbital optimization section, for some constants/thresholds - -Input: -| m | integer | number of MOs | -| n | integer | m*(m-1)/2 | -| H(n, n) | double precision | hessian | -| v_grad(n) | double precision | gradient | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n, n) | double precision | eigenvectors of the hessian | -| rho | double precision | agreement between the model and the reality, | -| | | represents the quality of the energy prediction | -| nb_iter | integer | number of iteration | - -Input/Ouput: -| delta | double precision | radius of the trust region | - -Output: -| x(n) | double precision | vector containing the step | - -Internal: -| accu | double precision | temporary variable to compute the step | -| lambda | double precision | lagrange multiplier | -| trust_radius2 | double precision | square of the radius of the trust region | -| norm2_x | double precision | norm^2 of the vector x | -| norm2_g | double precision | norm^2 of the vector containing the gradient | -| tmp_wtg(n) | double precision | tmp_wtg(i) = w_i^T . g | -| i, j, k | integer | indexes | - -Function: -| dnrm2 | double precision | Blas function computing the norm | -| f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f -subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compuet the step in the trust region - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: v_grad(n), rho - integer, intent(inout) :: nb_iter - double precision, intent(in) :: e_val(n), w(n,n) - - ! inout - double precision, intent(inout) :: delta - - ! out - double precision, intent(out) :: x(n) - - ! Internal - double precision :: accu, lambda, trust_radius2 - double precision :: norm2_x, norm2_g - double precision, allocatable :: tmp_wtg(:) - integer :: i,j,k - double precision :: t1,t2,t3 - integer :: n_neg_eval - - - ! Functions - double precision :: ddot, dnrm2 - double precision :: f_norm_trust_region_omp - - print*,'' - print*,'==================' - print*,'---Trust_region---' - print*,'==================' - - call wall_time(t1) - - ! Allocation - allocate(tmp_wtg(n)) -#+END_SRC - - -*** Initialization and norm - -The norm of the step size will be useful for the trust region -algorithm. We start from a first guess and the radius of the trust -region will evolve during the optimization. - -avoid_saddle is actually a test to avoid saddle points - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f - ! Initialization of the Lagrange multiplier - lambda = 0d0 - - ! List of w^T.g, to avoid the recomputation - tmp_wtg = 0d0 - do j = 1, n - do i = 1, n - tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) - enddo - enddo - - ! Replacement of the small tmp_wtg corresponding to a negative eigenvalue - ! in the case of avoid_saddle - if (avoid_saddle .and. e_val(1) < - thresh_eig) then - i = 2 - ! Number of negative eigenvalues - do while (e_val(i) < - thresh_eig) - if (tmp_wtg(i) < thresh_wtg2) then - if (version_avoid_saddle == 1) then - tmp_wtg(i) = 1d0 - elseif (version_avoid_saddle == 2) then - tmp_wtg(i) = DABS(e_val(i)) - elseif (version_avoid_saddle == 3) then - tmp_wtg(i) = dsqrt(DABS(e_val(i))) - else - tmp_wtg(i) = thresh_wtg2 - endif - endif - i = i + 1 - enddo - - ! For the fist one it's a little bit different - if (tmp_wtg(1) < thresh_wtg2) then - tmp_wtg(1) = 0d0 - endif - - endif - - ! Norm^2 of x, ||x||^2 - norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) - ! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta - ! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm - ! Anyway if the step is too big it will be reduced - print*,'||x||^2 :', norm2_x - - ! Norm^2 of the gradient, ||v_grad||^2 - norm2_g = (dnrm2(n,v_grad,1))**2 - print*,'||grad||^2 :', norm2_g -#+END_SRC - -*** Trust radius initialization - - At the first iteration (nb_iter = 0) we initialize the trust region - with the norm of the step generate by the Newton's method ($\textbf{x}_1 = - (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$, - we compute this norm using f_norm_trust_region_omp as explain just - below) - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f - ! trust radius - if (nb_iter == 0) then - trust_radius2 = norm2_x - ! To avoid infinite loop of cancellation of this first step - ! without changing delta - nb_iter = 1 - - ! Compute delta, delta = sqrt(trust_radius) - delta = dsqrt(trust_radius2) - endif -#+END_SRC - -*** Modification of the trust radius - -In function of rho (which represents the agreement between the model -and the reality, cf. rho_model) the trust region evolves. We update -delta (the radius of the trust region). - -To avoid too big trust region we put a maximum size. - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f - ! Modification of the trust radius in function of rho - if (rho >= 0.75d0) then - delta = 2d0 * delta - elseif (rho >= 0.5d0) then - delta = delta - elseif (rho >= 0.25d0) then - delta = 0.5d0 * delta - else - delta = 0.25d0 * delta - endif - - ! Maximum size of the trust region - !if (delta > 0.5d0 * n * pi) then - ! delta = 0.5d0 * n * pi - ! print*,'Delta > delta_max, delta = 0.5d0 * n * pi' - !endif - - if (delta > 1d10) then - delta = 1d10 - endif - - print*, 'Delta :', delta -#+END_SRC - -*** Calculation of the optimal lambda - -We search the solution of $(||x||^2 - \Delta^2)^2 = 0$ -- If $||\textbf{x}|| > \Delta$ or $h_1 < 0$ we have to add a constant - $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$ -- If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the - unconstrained one, $\lambda = 0$ - -You will find more details at the beginning - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f - ! By giving delta, we search (||x||^2 - delta^2)^2 = 0 - ! and not (||x||^2 - delta)^2 = 0 - - ! Research of lambda to solve ||x(lambda)|| = Delta - - ! Display - print*, 'e_val(1) = ', e_val(1) - print*, 'w_1^T.g =', tmp_wtg(1) - - ! H positive definite - if (e_val(1) > - thresh_eig) then - norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) - print*, '||x(0)||=', dsqrt(norm2_x) - print*, 'Delta=', delta - - ! H positive definite, ||x(lambda = 0)|| <= Delta - if (dsqrt(norm2_x) <= delta) then - print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' - print*, 'lambda = 0, no lambda optimization' - lambda = 0d0 - - ! H positive definite, ||x(lambda = 0)|| > Delta - else - ! Constraint solution - print*, 'H positive definite, ||x(lambda = 0)|| > Delta' - print*,'Computation of the optimal lambda...' - call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) - endif - - ! H indefinite - else - if (DABS(tmp_wtg(1)) < thresh_wtg) then - norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1)) - print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) - endif - - ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta - if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then - ! Add e_val(1) in order to have (H - e_val(1) I) positive definite - print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' - print*, 'lambda = -e_val(1), no lambda optimization' - lambda = - e_val(1) - - ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta - ! and - ! H indefinite, w_1^T.g =/= 0 - else - ! Constraint solution/ add lambda - if (DABS(tmp_wtg(1)) < thresh_wtg) then - print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' - else - print*, 'H indefinite, w_1^T.g =/= 0' - endif - print*, 'Computation of the optimal lambda...' - call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) - endif - - endif - - ! Recomputation of the norm^2 of the step x - norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) - print*,'' - print*,'Summary after the trust region:' - print*,'lambda:', lambda - print*,'||x||:', dsqrt(norm2_x) - print*,'delta:', delta -#+END_SRC - -*** Calculation of the step x - -x refers to $\textbf{x}^*$ -We compute x in function of lambda using its formula : -\begin{align*} -\textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i -+ \lambda} \cdot \textbf{w}_i -\end{align*} - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f - ! Initialisation - x = 0d0 - - ! Calculation of the step x - - ! Normal version - if (.not. absolute_eig) then - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - do j = 1, n - x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) - enddo - endif - enddo - - ! Version to use the absolute value of the eigenvalues - else - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig) then - do j = 1, n - x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) - enddo - endif - enddo - - endif - - double precision :: beta, norm_x - - ! Test - ! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1) - ! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first - ! eigenvectors multiply by a constant to ensure the condition - ! ||x(lambda=-e_val(1))|| = delta and escape the saddle point - if (avoid_saddle .and. e_val(1) < - thresh_eig) then - if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then - - ! norm of x - norm_x = dnrm2(n,x,1) - - ! Computes the coefficient for the w_1 - beta = delta**2 - norm_x**2 - - ! Updates the step x - x = x + W(:,1) * dsqrt(beta) - - ! Recomputes the norm to check - norm_x = dnrm2(n,x,1) - - print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):' - print*, '||x||', norm_x - endif - endif -#+END_SRC - -*** Transformation of x - -x is a vector of size n, so it can be write as a m by m -antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index". - - #+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f -! ! Step transformation vector -> matrix -! ! Vector with n element -> mo_num by mo_num matrix -! do j = 1, m -! do i = 1, m -! if (i>j) then -! call mat_to_vec_index(i,j,k) -! m_x(i,j) = x(k) -! else -! m_x(i,j) = 0d0 -! endif -! enddo -! enddo -! -! ! Antisymmetrization of the previous matrix -! do j = 1, m -! do i = 1, m -! if (i lower diagonal matrix (p,q), p > q - -! If a matrix is antisymmetric it can be reshaped as a vector. And the -! vector can be reshaped as an antisymmetric matrix - -! \begin{align*} -! \begin{pmatrix} -! 0 & -1 & -2 & -4 \\ -! 1 & 0 & -3 & -5 \\ -! 2 & 3 & 0 & -6 \\ -! 4 & 5 & 6 & 0 -! \end{pmatrix} -! \Leftrightarrow -! \begin{pmatrix} -! 1 & 2 & 3 & 4 & 5 & 6 -! \end{pmatrix} -! \end{align*} - -! !!! Here the algorithm only work for the lower diagonal !!! - -! Input: -! | i | integer | index in the vector | - -! Ouput: -! | p,q | integer | corresponding indexes in the lower diagonal of a matrix | -! | | | p > q, | -! | | | p -> row, | -! | | | q -> column | - - -subroutine vec_to_mat_index(i,p,q) - - include 'pi.h' - - BEGIN_DOC - ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing - ! its index i a vector - END_DOC - - implicit none - - ! Variables - - ! in - integer,intent(in) :: i - - ! out - integer, intent(out) :: p,q - - ! internal - integer :: a,b - double precision :: da - - da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i))) - a = INT(da) - if ((a*(a-1))/2==i) then - p = a-1 - else - p = a - endif - b = p*(p-1)/2 - - ! Matrix element indexes - p = p + 1 - q = i - b - -end subroutine diff --git a/src/utils_trust_region/vec_to_mat_index.org b/src/utils_trust_region/vec_to_mat_index.org deleted file mode 100644 index 0a09fa86..00000000 --- a/src/utils_trust_region/vec_to_mat_index.org +++ /dev/null @@ -1,72 +0,0 @@ -* Vector to matrix indexes - -*Compute the indexes p,q of a matrix element with the vector index i* - -Vector (i) -> lower diagonal matrix (p,q), p > q - -If a matrix is antisymmetric it can be reshaped as a vector. And the -vector can be reshaped as an antisymmetric matrix - -\begin{align*} -\begin{pmatrix} -0 & -1 & -2 & -4 \\ -1 & 0 & -3 & -5 \\ -2 & 3 & 0 & -6 \\ -4 & 5 & 6 & 0 -\end{pmatrix} -\Leftrightarrow -\begin{pmatrix} -1 & 2 & 3 & 4 & 5 & 6 -\end{pmatrix} -\end{align*} - -!!! Here the algorithm only work for the lower diagonal !!! - -Input: -| i | integer | index in the vector | - -Ouput: -| p,q | integer | corresponding indexes in the lower diagonal of a matrix | -| | | p > q, | -| | | p -> row, | -| | | q -> column | - -#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_index.irp.f -subroutine vec_to_mat_index(i,p,q) - - include 'pi.h' - - BEGIN_DOC - ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing - ! its index i a vector - END_DOC - - implicit none - - ! Variables - - ! in - integer,intent(in) :: i - - ! out - integer, intent(out) :: p,q - - ! internal - integer :: a,b - double precision :: da - - da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i))) - a = INT(da) - if ((a*(a-1))/2==i) then - p = a-1 - else - p = a - endif - b = p*(p-1)/2 - - ! Matrix element indexes - p = p + 1 - q = i - b - -end subroutine -#+END_SRC diff --git a/src/utils_trust_region/vec_to_mat_v2.irp.f b/src/utils_trust_region/vec_to_mat_v2.irp.f deleted file mode 100644 index 9140b8d3..00000000 --- a/src/utils_trust_region/vec_to_mat_v2.irp.f +++ /dev/null @@ -1,39 +0,0 @@ -! Vect to antisymmetric matrix using mat_to_vec_index - -! Vector to antisymmetric matrix transformation using mat_to_vec_index -! subroutine. - -! Can be done in OMP (for the first part and with omp critical for the second) - - -subroutine vec_to_mat_v2(n,m,v_x,m_x) - - BEGIN_DOC - ! Vector to antisymmetric matrix - END_DOC - - implicit none - - integer, intent(in) :: n,m - double precision, intent(in) :: v_x(n) - double precision, intent(out) :: m_x(m,m) - - integer :: i,j,k - - ! 1D -> 2D lower diagonal - m_x = 0d0 - do j = 1, m - 1 - do i = j + 1, m - call mat_to_vec_index(i,j,k) - m_x(i,j) = v_x(k) - enddo - enddo - - ! Antisym - do i = 1, m - 1 - do j = i + 1, m - m_x(i,j) = - m_x(j,i) - enddo - enddo - -end diff --git a/src/utils_trust_region/vec_to_mat_v2.org b/src/utils_trust_region/vec_to_mat_v2.org deleted file mode 100644 index 4e358a88..00000000 --- a/src/utils_trust_region/vec_to_mat_v2.org +++ /dev/null @@ -1,40 +0,0 @@ -* Vect to antisymmetric matrix using mat_to_vec_index - -Vector to antisymmetric matrix transformation using mat_to_vec_index -subroutine. - -Can be done in OMP (for the first part and with omp critical for the second) - -#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_v2.irp.f -subroutine vec_to_mat_v2(n,m,v_x,m_x) - - BEGIN_DOC - ! Vector to antisymmetric matrix - END_DOC - - implicit none - - integer, intent(in) :: n,m - double precision, intent(in) :: v_x(n) - double precision, intent(out) :: m_x(m,m) - - integer :: i,j,k - - ! 1D -> 2D lower diagonal - m_x = 0d0 - do j = 1, m - 1 - do i = j + 1, m - call mat_to_vec_index(i,j,k) - m_x(i,j) = v_x(k) - enddo - enddo - - ! Antisym - do i = 1, m - 1 - do j = i + 1, m - m_x(i,j) = - m_x(j,i) - enddo - enddo - -end -#+END_SRC From c687569bf488f6c5e93f7c56c2f415a4b22686be Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 18 Apr 2023 13:01:25 +0200 Subject: [PATCH 05/29] create utils trust region --- src/utils_trust_region/EZFIO.cfg | 89 + src/utils_trust_region/NEED | 1 + src/utils_trust_region/README.md | 11 + src/utils_trust_region/algo_trust.irp.f | 248 +++ .../apply_mo_rotation.irp.f | 85 + src/utils_trust_region/mat_to_vec_index.irp.f | 61 + src/utils_trust_region/org/TANGLE_org_mode.sh | 7 + src/utils_trust_region/org/algo_trust.org | 593 ++++++ .../org/apply_mo_rotation.org | 86 + .../org/mat_to_vec_index.org | 63 + .../org/rotation_matrix.org | 452 +++++ .../org/rotation_matrix_iterative.org | 136 ++ .../org/sub_to_full_rotation_matrix.org | 65 + .../org/trust_region_expected_e.org | 128 ++ .../org/trust_region_optimal_lambda.org | 1661 +++++++++++++++++ .../org/trust_region_rho.org | 122 ++ .../org/trust_region_step.org | 759 ++++++++ .../org/vec_to_mat_index.org | 72 + src/utils_trust_region/org/vec_to_mat_v2.org | 40 + src/utils_trust_region/pi.h | 2 + src/utils_trust_region/rotation_matrix.irp.f | 441 +++++ .../rotation_matrix_iterative.irp.f | 134 ++ .../sub_to_full_rotation_matrix.irp.f | 64 + .../trust_region_expected_e.irp.f | 126 ++ .../trust_region_optimal_lambda.irp.f | 1651 ++++++++++++++++ src/utils_trust_region/trust_region_rho.irp.f | 120 ++ .../trust_region_step.irp.f | 749 ++++++++ src/utils_trust_region/vec_to_mat_index.irp.f | 71 + src/utils_trust_region/vec_to_mat_v2.irp.f | 39 + 29 files changed, 8076 insertions(+) create mode 100644 src/utils_trust_region/EZFIO.cfg create mode 100644 src/utils_trust_region/NEED create mode 100644 src/utils_trust_region/README.md create mode 100644 src/utils_trust_region/algo_trust.irp.f create mode 100644 src/utils_trust_region/apply_mo_rotation.irp.f create mode 100644 src/utils_trust_region/mat_to_vec_index.irp.f create mode 100755 src/utils_trust_region/org/TANGLE_org_mode.sh create mode 100644 src/utils_trust_region/org/algo_trust.org create mode 100644 src/utils_trust_region/org/apply_mo_rotation.org create mode 100644 src/utils_trust_region/org/mat_to_vec_index.org create mode 100644 src/utils_trust_region/org/rotation_matrix.org create mode 100644 src/utils_trust_region/org/rotation_matrix_iterative.org create mode 100644 src/utils_trust_region/org/sub_to_full_rotation_matrix.org create mode 100644 src/utils_trust_region/org/trust_region_expected_e.org create mode 100644 src/utils_trust_region/org/trust_region_optimal_lambda.org create mode 100644 src/utils_trust_region/org/trust_region_rho.org create mode 100644 src/utils_trust_region/org/trust_region_step.org create mode 100644 src/utils_trust_region/org/vec_to_mat_index.org create mode 100644 src/utils_trust_region/org/vec_to_mat_v2.org create mode 100644 src/utils_trust_region/pi.h create mode 100644 src/utils_trust_region/rotation_matrix.irp.f create mode 100644 src/utils_trust_region/rotation_matrix_iterative.irp.f create mode 100644 src/utils_trust_region/sub_to_full_rotation_matrix.irp.f create mode 100644 src/utils_trust_region/trust_region_expected_e.irp.f create mode 100644 src/utils_trust_region/trust_region_optimal_lambda.irp.f create mode 100644 src/utils_trust_region/trust_region_rho.irp.f create mode 100644 src/utils_trust_region/trust_region_step.irp.f create mode 100644 src/utils_trust_region/vec_to_mat_index.irp.f create mode 100644 src/utils_trust_region/vec_to_mat_v2.irp.f diff --git a/src/utils_trust_region/EZFIO.cfg b/src/utils_trust_region/EZFIO.cfg new file mode 100644 index 00000000..9c9f6248 --- /dev/null +++ b/src/utils_trust_region/EZFIO.cfg @@ -0,0 +1,89 @@ +[thresh_delta] +type: double precision +doc: Threshold to stop the optimization if the radius of the trust region delta < thresh_delta +interface: ezfio,provider,ocaml +default: 1.e-10 + +[thresh_rho] +type: double precision +doc: Threshold for the step acceptance in the trust region algorithm, if (rho .geq. thresh_rho) the step is accepted, else the step is cancelled and a smaller step is tried until (rho .geq. thresh_rho) +interface: ezfio,provider,ocaml +default: 0.1 + +[thresh_eig] +type: double precision +doc: Threshold to consider when an eigenvalue is 0 in the trust region algorithm +interface: ezfio,provider,ocaml +default: 1.e-12 + +[thresh_model] +type: double precision +doc: If if ABS(criterion - criterion_model) < thresh_model, the program exit the trust region algorithm +interface: ezfio,provider,ocaml +default: 1.e-12 + +[absolute_eig] +type: logical +doc: If True, the algorithm replace the eigenvalues of the hessian by their absolute value to compute the step (in the trust region) +interface: ezfio,provider,ocaml +default: false + +[thresh_wtg] +type: double precision +doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is equal to 0. Must be smaller than thresh_eig by several order of magnitude to avoid numerical problem. If the research of the optimal lambda cannot reach the condition (||x|| .eq. delta) because (||x|| .lt. delta), the reason might be that thresh_wtg is too big or/and thresh_eig is too small +interface: ezfio,provider,ocaml +default: 1.e-6 + +[thresh_wtg2] +type: double precision +doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is 0 in the case of avoid_saddle .eq. true. There is no particular reason to put a different value that thresh_wtg, but it can be useful one day +interface: ezfio,provider,ocaml +default: 1.e-6 + +[avoid_saddle] +type: logical +doc: Test to avoid saddle point, active if true +interface: ezfio,provider,ocaml +default: false + +[version_avoid_saddle] +type: integer +doc: cf. trust region, not stable +interface: ezfio,provider,ocaml +default: 3 + +[thresh_rho_2] +type: double precision +doc: Threshold for the step acceptance for the research of lambda in the trust region algorithm, if (rho_2 .geq. thresh_rho_2) the step is accepted, else the step is rejected +interface: ezfio,provider,ocaml +default: 0.1 + +[thresh_cc] +type: double precision +doc: Threshold to stop the research of the optimal lambda in the trust region algorithm when (dabs(1d0-||x||^2/delta^2) < thresh_cc) +interface: ezfio,provider,ocaml +default: 1.e-6 + +[thresh_model_2] +type: double precision +doc: if (ABS(criterion - criterion_model) < thresh_model_2), i.e., the difference between the actual criterion and the predicted next criterion, during the research of the optimal lambda in the trust region algorithm it prints a warning +interface: ezfio,provider,ocaml +default: 1.e-12 + +[version_lambda_search] +type: integer +doc: Research of the optimal lambda in the trust region algorithm to constrain the norm of the step by solving: 1 -> ||x||^2 - delta^2 .eq. 0, 2 -> 1/||x||^2 - 1/delta^2 .eq. 0 +interface: ezfio,provider,ocaml +default: 2 + +[nb_it_max_lambda] +type: integer +doc: Maximal number of iterations for the research of the optimal lambda in the trust region algorithm +interface: ezfio,provider,ocaml +default: 100 + +[nb_it_max_pre_search] +type: integer +doc: Maximal number of iterations for the pre-research of the optimal lambda in the trust region algorithm +interface: ezfio,provider,ocaml +default: 40 diff --git a/src/utils_trust_region/NEED b/src/utils_trust_region/NEED new file mode 100644 index 00000000..1a65ce38 --- /dev/null +++ b/src/utils_trust_region/NEED @@ -0,0 +1 @@ +hartree_fock diff --git a/src/utils_trust_region/README.md b/src/utils_trust_region/README.md new file mode 100644 index 00000000..72bfefef --- /dev/null +++ b/src/utils_trust_region/README.md @@ -0,0 +1,11 @@ +# Utils trust region + +The documentation can be found in the org files. + +# Org files +The org files are stored in the directory org in order to avoid overwriting on user changes. +The org files can be modified, to export the change to the source code, run +``` +./TANGLE_org_mode.sh +mv *.irp.f ../. +``` diff --git a/src/utils_trust_region/algo_trust.irp.f b/src/utils_trust_region/algo_trust.irp.f new file mode 100644 index 00000000..933d8eff --- /dev/null +++ b/src/utils_trust_region/algo_trust.irp.f @@ -0,0 +1,248 @@ +! Algorithm for the trust region + +! step_in_trust_region: +! Computes the step in the trust region (delta) +! (automatically sets at the iteration 0 and which evolves during the +! process in function of the evolution of rho). The step is computing by +! constraining its norm with a lagrange multiplier. +! Since the calculation of the step is based on the Newton method, an +! estimation of the gain in energy is given using the Taylors series +! truncated at the second order (criterion_model). +! If (DABS(criterion-criterion_model) < 1d-12) then +! must_exit = .True. +! else +! must_exit = .False. + +! This estimation of the gain in energy is used by +! is_step_cancel_trust_region to say if the step is accepted or cancelled. + +! If the step must be cancelled, the calculation restart from the same +! hessian and gradient and recomputes the step but in a smaller trust +! region and so on until the step is accepted. If the step is accepted +! the hessian and the gradient are recomputed to produce a new step. + +! Example: + + +! !### Initialization ### +! delta = 0d0 +! nb_iter = 0 ! Must start at 0 !!! +! rho = 0.5d0 +! not_converged = .True. +! +! ! ### TODO ### +! ! Compute the criterion before the loop +! call #your_criterion(prev_criterion) +! +! do while (not_converged) +! ! ### TODO ## +! ! Call your gradient +! ! Call you hessian +! call #your_gradient(v_grad) (1D array) +! call #your_hessian(H) (2D array) +! +! ! ### TODO ### +! ! Diagonalization of the hessian +! call diagonalization_hessian(n,H,e_val,w) +! +! cancel_step = .True. ! To enter in the loop just after +! ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho +! do while (cancel_step) +! +! ! Hessian,gradient,Criterion -> x +! call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) +! +! if (must_exit) then +! ! ### Message ### +! ! if step_in_trust_region sets must_exit on true for numerical reasons +! print*,'algo_trust1 sends the message : Exit' +! !### exit ### +! endif +! +! !### TODO ### +! ! Compute x -> m_x +! ! Compute m_x -> R +! ! Apply R and keep the previous MOs... +! ! Update/touch +! ! Compute the new criterion/energy -> criterion +! +! call #your_routine_1D_to_2D_antisymmetric_array(x,m_x) +! call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R) +! call #your_routine_to_apply_the_rotation_matrix(R,prev_mos) +! +! TOUCH #your_variables +! +! call #your_criterion(criterion) +! +! ! Criterion -> step accepted or rejected +! call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) +! +! ! ### TODO ### +! !if (cancel_step) then +! ! Cancel the previous step (mo_coef = prev_mos if you keep them...) +! !endif +! #if (cancel_step) then +! #mo_coef = prev_mos +! #endif +! +! enddo +! +! !call save_mos() !### depend of the time for 1 iteration +! +! ! To exit the external loop if must_exit = .True. +! if (must_exit) then +! !### exit ### +! endif +! +! ! Step accepted, nb iteration + 1 +! nb_iter = nb_iter + 1 +! +! ! ### TODO ### +! !if (###Conditions###) then +! ! no_converged = .False. +! !endif +! #if (#your_conditions) then +! # not_converged = .False. +! #endif +! +! enddo + + + +! Variables: + +! Input: +! | n | integer | m*(m-1)/2 | +! | m | integer | number of mo in the mo_class | +! | H(n,n) | double precision | Hessian | +! | v_grad(n) | double precision | Gradient | +! | W(n,n) | double precision | Eigenvectors of the hessian | +! | e_val(n) | double precision | Eigenvalues of the hessian | +! | criterion | double precision | Actual criterion | +! | prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration | +! | rho | double precision | Given by is_step_cancel_trus_region | +! | | | Agreement between the real function and the Taylor series (2nd order) | +! | nb_iter | integer | Actual number of iterations | + +! Input/output: +! | delta | double precision | Radius of the trust region | + +! Output: +! | criterion_model | double precision | Predicted criterion after the rotation | +! | x(n) | double precision | Step | +! | must_exit | logical | If the program must exit the loop | + + +subroutine trust_region_step_w_expected_e(n,n2,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the step and the expected criterion/energy after the step + !END_DOC + + implicit none + + ! in + integer, intent(in) :: n,n2, nb_iter + double precision, intent(in) :: H(n,n2), W(n,n2), v_grad(n) + double precision, intent(in) :: rho, prev_criterion + + ! inout + double precision, intent(inout) :: delta, e_val(n) + + ! out + double precision, intent(out) :: criterion_model, x(n) + logical, intent(out) :: must_exit + + ! internal + integer :: info + + must_exit = .False. + + call trust_region_step(n,n2,nb_iter,v_grad,rho,e_val,W,x,delta) + + call trust_region_expected_e(n,n2,v_grad,H,x,prev_criterion,criterion_model) + + ! exit if DABS(prev_criterion - criterion_model) < 1d-12 + if (DABS(prev_criterion - criterion_model) < thresh_model) then + print*,'' + print*,'###############################################################################' + print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region' + print*,'###############################################################################' + print*,'' + must_exit = .True. + endif + + if (delta < thresh_delta) then + print*,'' + print*,'##############################################' + print*,'Delta <', thresh_delta, 'stop the trust region' + print*,'##############################################' + print*,'' + must_exit = .True. + endif + + ! Add after the call to this subroutine, a statement: + ! "if (must_exit) then + ! exit + ! endif" + ! in order to exit the optimization loop + +end subroutine + + + +! Variables: + +! Input: +! | nb_iter | integer | actual number of iterations | +! | prev_criterion | double precision | criterion before the application of the step x | +! | criterion | double precision | criterion after the application of the step x | +! | criterion_model | double precision | predicted criterion after the application of x | + +! Output: +! | rho | double precision | Agreement between the predicted criterion and the real new criterion | +! | cancel_step | logical | If the step must be cancelled | + + +subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) + + include 'pi.h' + + !BEGIN_DOC + ! Compute if the step should be cancelled + !END_DOC + + implicit none + + ! in + double precision, intent(in) :: prev_criterion, criterion, criterion_model + + ! inout + integer, intent(inout) :: nb_iter + + ! out + logical, intent(out) :: cancel_step + double precision, intent(out) :: rho + + ! Computes rho + call trust_region_rho(prev_criterion,criterion,criterion_model,rho) + + if (nb_iter == 0) then + nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled + endif + + ! If rho < thresh_rho -> give something in output to cancel the step + if (rho >= thresh_rho) then !0.1d0) then + ! The step is accepted + cancel_step = .False. + else + ! The step is rejected + cancel_step = .True. + print*, '***********************' + print*, 'Step cancel : rho <', thresh_rho + print*, '***********************' + endif + +end subroutine diff --git a/src/utils_trust_region/apply_mo_rotation.irp.f b/src/utils_trust_region/apply_mo_rotation.irp.f new file mode 100644 index 00000000..a313769d --- /dev/null +++ b/src/utils_trust_region/apply_mo_rotation.irp.f @@ -0,0 +1,85 @@ +! Apply MO rotation +! Subroutine to apply the rotation matrix to the coefficients of the +! MOs. + +! New MOs = Old MOs . Rotation matrix + +! *Compute the new MOs with the previous MOs and a rotation matrix* + +! Provided: +! | mo_num | integer | number of MOs | +! | ao_num | integer | number of AOs | +! | mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs | + +! Intent in: +! | R(mo_num,mo_num) | double precision | rotation matrix | + +! Intent out: +! | prev_mos(ao_num,mo_num) | double precision | MOs before the rotation | + +! Internal: +! | new_mos(ao_num,mo_num) | double precision | MOs after the rotation | +! | i,j | integer | indexes | + +subroutine apply_mo_rotation(R,prev_mos) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the new MOs knowing the rotation matrix + !END_DOC + + implicit none + + ! Variables + + ! in + double precision, intent(in) :: R(mo_num,mo_num) + + ! out + double precision, intent(out) :: prev_mos(ao_num,mo_num) + + ! internal + double precision, allocatable :: new_mos(:,:) + integer :: i,j + double precision :: t1,t2,t3 + + print*,'' + print*,'---apply_mo_rotation---' + + call wall_time(t1) + + ! Allocation + allocate(new_mos(ao_num,mo_num)) + + ! Calculation + + ! Product of old MOs (mo_coef) by Rotation matrix (R) + call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1)) + + prev_mos = mo_coef + mo_coef = new_mos + + if (debug) then + print*,'New mo_coef : ' + do i = 1, mo_num + write(*,'(100(F10.5))') mo_coef(i,:) + enddo + endif + + ! Save the new MOs and change the label + mo_label = 'MCSCF' + !call save_mos + call ezfio_set_determinants_mo_label(mo_label) + + !print*,'Done, MOs saved' + + ! Deallocation, end + deallocate(new_mos) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in apply mo rotation:', t3 + print*,'---End apply_mo_rotation---' + +end subroutine diff --git a/src/utils_trust_region/mat_to_vec_index.irp.f b/src/utils_trust_region/mat_to_vec_index.irp.f new file mode 100644 index 00000000..35e12232 --- /dev/null +++ b/src/utils_trust_region/mat_to_vec_index.irp.f @@ -0,0 +1,61 @@ +! Matrix to vector index + +! *Compute the index i of a vector element from the indexes p,q of a +! matrix element* + +! Lower diagonal matrix (p,q), p > q -> vector (i) + +! If a matrix is antisymmetric it can be reshaped as a vector. And the +! vector can be reshaped as an antisymmetric matrix + +! \begin{align*} +! \begin{pmatrix} +! 0 & -1 & -2 & -4 \\ +! 1 & 0 & -3 & -5 \\ +! 2 & 3 & 0 & -6 \\ +! 4 & 5 & 6 & 0 +! \end{pmatrix} +! \Leftrightarrow +! \begin{pmatrix} +! 1 & 2 & 3 & 4 & 5 & 6 +! \end{pmatrix} +! \end{align*} + +! !!! Here the algorithm only work for the lower diagonal !!! + +! Input: +! | p,q | integer | indexes of a matrix element in the lower diagonal | +! | | | p > q, q -> column | +! | | | p -> row, | +! | | | q -> column | + +! Input: +! | i | integer | corresponding index in the vector | + + +subroutine mat_to_vec_index(p,q,i) + + include 'pi.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: p,q + + ! out + integer, intent(out) :: i + + ! internal + integer :: a,b + double precision :: da + + ! Calculation + + a = p-1 + b = a*(a-1)/2 + + i = q+b + +end subroutine diff --git a/src/utils_trust_region/org/TANGLE_org_mode.sh b/src/utils_trust_region/org/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/utils_trust_region/org/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done diff --git a/src/utils_trust_region/org/algo_trust.org b/src/utils_trust_region/org/algo_trust.org new file mode 100644 index 00000000..01e99c29 --- /dev/null +++ b/src/utils_trust_region/org/algo_trust.org @@ -0,0 +1,593 @@ +* Algorithm for the trust region + +step_in_trust_region: +Computes the step in the trust region (delta) +(automatically sets at the iteration 0 and which evolves during the +process in function of the evolution of rho). The step is computing by +constraining its norm with a lagrange multiplier. +Since the calculation of the step is based on the Newton method, an +estimation of the gain in energy is given using the Taylors series +truncated at the second order (criterion_model). +If (DABS(criterion-criterion_model) < 1d-12) then + must_exit = .True. +else + must_exit = .False. + +This estimation of the gain in energy is used by +is_step_cancel_trust_region to say if the step is accepted or cancelled. + +If the step must be cancelled, the calculation restart from the same +hessian and gradient and recomputes the step but in a smaller trust +region and so on until the step is accepted. If the step is accepted +the hessian and the gradient are recomputed to produce a new step. + +Example: + +#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f +! !### Initialization ### +! delta = 0d0 +! nb_iter = 0 ! Must start at 0 !!! +! rho = 0.5d0 +! not_converged = .True. +! +! ! ### TODO ### +! ! Compute the criterion before the loop +! call #your_criterion(prev_criterion) +! +! do while (not_converged) +! ! ### TODO ## +! ! Call your gradient +! ! Call you hessian +! call #your_gradient(v_grad) (1D array) +! call #your_hessian(H) (2D array) +! +! ! ### TODO ### +! ! Diagonalization of the hessian +! call diagonalization_hessian(n,H,e_val,w) +! +! cancel_step = .True. ! To enter in the loop just after +! ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho +! do while (cancel_step) +! +! ! Hessian,gradient,Criterion -> x +! call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) +! +! if (must_exit) then +! ! ### Message ### +! ! if step_in_trust_region sets must_exit on true for numerical reasons +! print*,'algo_trust1 sends the message : Exit' +! !### exit ### +! endif +! +! !### TODO ### +! ! Compute x -> m_x +! ! Compute m_x -> R +! ! Apply R and keep the previous MOs... +! ! Update/touch +! ! Compute the new criterion/energy -> criterion +! +! call #your_routine_1D_to_2D_antisymmetric_array(x,m_x) +! call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R) +! call #your_routine_to_apply_the_rotation_matrix(R,prev_mos) +! +! TOUCH #your_variables +! +! call #your_criterion(criterion) +! +! ! Criterion -> step accepted or rejected +! call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) +! +! ! ### TODO ### +! !if (cancel_step) then +! ! Cancel the previous step (mo_coef = prev_mos if you keep them...) +! !endif +! #if (cancel_step) then +! #mo_coef = prev_mos +! #endif +! +! enddo +! +! !call save_mos() !### depend of the time for 1 iteration +! +! ! To exit the external loop if must_exit = .True. +! if (must_exit) then +! !### exit ### +! endif +! +! ! Step accepted, nb iteration + 1 +! nb_iter = nb_iter + 1 +! +! ! ### TODO ### +! !if (###Conditions###) then +! ! no_converged = .False. +! !endif +! #if (#your_conditions) then +! # not_converged = .False. +! #endif +! +! enddo +#+END_SRC + +Variables: + +Input: +| n | integer | m*(m-1)/2 | +| m | integer | number of mo in the mo_class | +| H(n,n) | double precision | Hessian | +| v_grad(n) | double precision | Gradient | +| W(n,n) | double precision | Eigenvectors of the hessian | +| e_val(n) | double precision | Eigenvalues of the hessian | +| criterion | double precision | Actual criterion | +| prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration | +| rho | double precision | Given by is_step_cancel_trus_region | +| | | Agreement between the real function and the Taylor series (2nd order) | +| nb_iter | integer | Actual number of iterations | + +Input/output: +| delta | double precision | Radius of the trust region | + +Output: +| criterion_model | double precision | Predicted criterion after the rotation | +| x(n) | double precision | Step | +| must_exit | logical | If the program must exit the loop | + +#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f +subroutine trust_region_step_w_expected_e(n,n2,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the step and the expected criterion/energy after the step + !END_DOC + + implicit none + + ! in + integer, intent(in) :: n,n2, nb_iter + double precision, intent(in) :: H(n,n2), W(n,n2), v_grad(n) + double precision, intent(in) :: rho, prev_criterion + + ! inout + double precision, intent(inout) :: delta, e_val(n) + + ! out + double precision, intent(out) :: criterion_model, x(n) + logical, intent(out) :: must_exit + + ! internal + integer :: info + + must_exit = .False. + + call trust_region_step(n,n2,nb_iter,v_grad,rho,e_val,W,x,delta) + + call trust_region_expected_e(n,n2,v_grad,H,x,prev_criterion,criterion_model) + + ! exit if DABS(prev_criterion - criterion_model) < 1d-12 + if (DABS(prev_criterion - criterion_model) < thresh_model) then + print*,'' + print*,'###############################################################################' + print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region' + print*,'###############################################################################' + print*,'' + must_exit = .True. + endif + + if (delta < thresh_delta) then + print*,'' + print*,'##############################################' + print*,'Delta <', thresh_delta, 'stop the trust region' + print*,'##############################################' + print*,'' + must_exit = .True. + endif + + ! Add after the call to this subroutine, a statement: + ! "if (must_exit) then + ! exit + ! endif" + ! in order to exit the optimization loop + +end subroutine +#+END_SRC + +Variables: + +Input: +| nb_iter | integer | actual number of iterations | +| prev_criterion | double precision | criterion before the application of the step x | +| criterion | double precision | criterion after the application of the step x | +| criterion_model | double precision | predicted criterion after the application of x | + +Output: +| rho | double precision | Agreement between the predicted criterion and the real new criterion | +| cancel_step | logical | If the step must be cancelled | + +#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f +subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) + + include 'pi.h' + + !BEGIN_DOC + ! Compute if the step should be cancelled + !END_DOC + + implicit none + + ! in + double precision, intent(in) :: prev_criterion, criterion, criterion_model + + ! inout + integer, intent(inout) :: nb_iter + + ! out + logical, intent(out) :: cancel_step + double precision, intent(out) :: rho + + ! Computes rho + call trust_region_rho(prev_criterion,criterion,criterion_model,rho) + + if (nb_iter == 0) then + nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled + endif + + ! If rho < thresh_rho -> give something in output to cancel the step + if (rho >= thresh_rho) then !0.1d0) then + ! The step is accepted + cancel_step = .False. + else + ! The step is rejected + cancel_step = .True. + print*, '***********************' + print*, 'Step cancel : rho <', thresh_rho + print*, '***********************' + endif + +end subroutine +#+END_SRC + +** Template for MOs +#+BEGIN_SRC f90 :comments org :tangle trust_region_template_mos.txt +subroutine algo_trust_template(tmp_n, tmp_list_size, tmp_list) + + implicit none + + ! Variables + + ! In + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + + ! Out + ! Rien ou un truc pour savoir si ça c'est bien passé + + ! Internal + double precision, allocatable :: e_val(:), W(:,:), tmp_R(:,:), R(:,:), tmp_x(:), tmp_m_x(:,:) + double precision, allocatable :: prev_mos(:,:) + double precision :: criterion, prev_criterion, criterion_model + double precision :: delta, rho + logical :: not_converged, cancel_step, must_exit, enforce_step_cancellation + integer :: nb_iter, info, nb_sub_iter + integer :: i,j,tmp_i,tmp_j + + allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n),tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size), R(mo_num, mo_num)) + allocate(prev_mos(ao_num, mo_num)) + + ! Provide the criterion, but unnecessary because it's done + ! automatically + PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + + ! Initialization + delta = 0d0 + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must start at 0.5 + not_converged = .True. ! Must be true + + ! Compute the criterion before the loop + prev_criterion = C_PROVIDER + + do while (not_converged) + + print*,'' + print*,'******************' + print*,'Iteration', nb_iter + print*,'******************' + print*,'' + + ! The new hessian and gradient are computed at the end of the previous iteration + ! Diagonalization of the hessian + call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) + + cancel_step = .True. ! To enter in the loop just after + nb_sub_iter = 0 + + ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + + print*,'-----------------------------' + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & + prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + if (must_exit) then + ! if step_in_trust_region sets must_exit on true for numerical reasons + print*,'trust_region_step_w_expected_e sent the message : Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Forces the step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! touch mo_coef + call clear_mo_map ! Only if you are using the bi-electronic integrals + ! mo_coef becomes valid + ! And avoid the recomputation of the providers which depend of mo_coef + TOUCH mo_coef C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + + ! To update the other parameters if needed + call #update_parameters() + + ! To enforce the program to provide new criterion after the update + ! of the parameters + FREE C_PROVIDER + PROVIDE C_PROVIDER + criterion = C_PROVIDER + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) + + ! Cancellation of the step ? + if (cancel_step) then + ! Replacement by the previous MOs + mo_coef = prev_mos + ! call save_mos() ! depends of the time for 1 iteration + + ! No need to clear_mo_map since we don't recompute the gradient and the hessian + ! mo_coef becomes valid + ! Avoid the recomputation of the providers which depend of mo_coef + TOUCH mo_coef H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER + else + ! The step is accepted: + ! criterion -> prev criterion + + ! The replacement "criterion -> prev criterion" is already done + ! in trust_region_rho, so if the criterion does not have a reason + ! to change, it will change nothing for the criterion and will + ! force the program to provide the new hessian, gradient and + ! convergence criterion for the next iteration. + ! But in the case of orbital optimization we diagonalize the CI + ! matrix after the "FREE" statement, so the criterion will change + + FREE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + prev_criterion = C_PROVIDER + + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + + ! call save_mos() ! depends of the time for 1 iteration + + ! To exit the external loop if must_exit = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! Provide the convergence criterion + ! Provide the gradient and the hessian for the next iteration + PROVIDE cc_PROVIDER + + ! To exit + if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then + not_converged = .False. + endif + + if (nb_iter > optimization_max_nb_iter) then + not_converged = .False. + endif + + if (delta < thresh_delta) then + not_converged = .False. + endif + + enddo + + ! Save the final MOs + call save_mos() + + ! Diagonalization of the hessian + ! (To see the eigenvalues at the end of the optimization) + call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) + + deallocate(e_val, W, tmp_R, R, tmp_x, prev_mos) + +end +#+END_SRC + +** Cartesian version +#+BEGIN_SRC f90 :comments org :tangle trust_region_template_xyz.txt +subroutine algo_trust_cartesian_template(tmp_n) + + implicit none + + ! Variables + + ! In + integer, intent(in) :: tmp_n + + ! Out + ! Rien ou un truc pour savoir si ça c'est bien passé + + ! Internal + double precision, allocatable :: e_val(:), W(:,:), tmp_x(:) + double precision :: criterion, prev_criterion, criterion_model + double precision :: delta, rho + logical :: not_converged, cancel_step, must_exit + integer :: nb_iter, nb_sub_iter + integer :: i,j + + allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n)) + + PROVIDE C_PROVIDER X_PROVIDER H_PROVIDER g_PROVIDER + + ! Initialization + delta = 0d0 + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must start at 0.5 + not_converged = .True. ! Must be true + + ! Compute the criterion before the loop + prev_criterion = C_PROVIDER + + do while (not_converged) + + print*,'' + print*,'******************' + print*,'Iteration', nb_iter + print*,'******************' + print*,'' + + if (nb_iter > 0) then + PROVIDE H_PROVIDER g_PROVIDER + endif + + ! Diagonalization of the hessian + call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) + + cancel_step = .True. ! To enter in the loop just after + nb_sub_iter = 0 + + ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + + print*,'-----------------------------' + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & + prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + if (must_exit) then + ! if step_in_trust_region sets must_exit on true for numerical reasons + print*,'trust_region_step_w_expected_e sent the message : Exit' + exit + endif + + ! New coordinates, check the sign + X_PROVIDER = X_PROVIDER - tmp_x + + ! touch X_PROVIDER + TOUCH X_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + + ! To update the other parameters if needed + call #update_parameters() + + ! New criterion + PROVIDE C_PROVIDER ! Unnecessary + criterion = C_PROVIDER + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) + + ! Cancel the previous step + if (cancel_step) then + ! Replacement by the previous coordinates, check the sign + X_PROVIDER = X_PROVIDER + tmp_x + + ! Avoid the recomputation of the hessian and the gradient + TOUCH X_PROVIDER H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + + ! To exit the external loop if must_exit = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + PROVIDE cc_PROVIDER + + ! To exit + if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then + not_converged = .False. + endif + + if (nb_iter > optimization_max_nb_iter) then + not_converged = .False. + endif + + if (delta < thresh_delta) then + not_converged = .False. + endif + + enddo + + deallocate(e_val, W, tmp_x) + +end +#+END_SRC + +** Script template +#+BEGIN_SRC bash :tangle script_template_mos.sh +#!/bin/bash + +your_file= + +your_C_PROVIDER= +your_H_PROVIDER= +your_g_PROVIDER= +your_cc_PROVIDER= + +sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_mos.txt > $your_file +sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file +sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file +sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file +#+END_SRC + +#+BEGIN_SRC bash :tangle script_template_xyz.sh +#!/bin/bash + +your_file= + +your_C_PROVIDER= +your_X_PROVIDER= +your_H_PROVIDER= +your_g_PROVIDER= +your_cc_PROVIDER= + +sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_xyz.txt > $your_file +sed -i "s/X_PROVIDER/$your_X_PROVIDER/g" $your_file +sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file +sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file +sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file +#+END_SRC + diff --git a/src/utils_trust_region/org/apply_mo_rotation.org b/src/utils_trust_region/org/apply_mo_rotation.org new file mode 100644 index 00000000..955997e9 --- /dev/null +++ b/src/utils_trust_region/org/apply_mo_rotation.org @@ -0,0 +1,86 @@ +* Apply MO rotation +Subroutine to apply the rotation matrix to the coefficients of the +MOs. + +New MOs = Old MOs . Rotation matrix + +*Compute the new MOs with the previous MOs and a rotation matrix* + +Provided: +| mo_num | integer | number of MOs | +| ao_num | integer | number of AOs | +| mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs | + +Intent in: +| R(mo_num,mo_num) | double precision | rotation matrix | + +Intent out: +| prev_mos(ao_num,mo_num) | double precision | MOs before the rotation | + +Internal: +| new_mos(ao_num,mo_num) | double precision | MOs after the rotation | +| i,j | integer | indexes | +#+BEGIN_SRC f90 :comments org :tangle apply_mo_rotation.irp.f +subroutine apply_mo_rotation(R,prev_mos) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the new MOs knowing the rotation matrix + !END_DOC + + implicit none + + ! Variables + + ! in + double precision, intent(in) :: R(mo_num,mo_num) + + ! out + double precision, intent(out) :: prev_mos(ao_num,mo_num) + + ! internal + double precision, allocatable :: new_mos(:,:) + integer :: i,j + double precision :: t1,t2,t3 + + print*,'' + print*,'---apply_mo_rotation---' + + call wall_time(t1) + + ! Allocation + allocate(new_mos(ao_num,mo_num)) + + ! Calculation + + ! Product of old MOs (mo_coef) by Rotation matrix (R) + call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1)) + + prev_mos = mo_coef + mo_coef = new_mos + + if (debug) then + print*,'New mo_coef : ' + do i = 1, mo_num + write(*,'(100(F10.5))') mo_coef(i,:) + enddo + endif + + ! Save the new MOs and change the label + mo_label = 'MCSCF' + !call save_mos + call ezfio_set_determinants_mo_label(mo_label) + + !print*,'Done, MOs saved' + + ! Deallocation, end + deallocate(new_mos) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in apply mo rotation:', t3 + print*,'---End apply_mo_rotation---' + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/org/mat_to_vec_index.org b/src/utils_trust_region/org/mat_to_vec_index.org new file mode 100644 index 00000000..50840584 --- /dev/null +++ b/src/utils_trust_region/org/mat_to_vec_index.org @@ -0,0 +1,63 @@ +* Matrix to vector index + +*Compute the index i of a vector element from the indexes p,q of a +matrix element* + +Lower diagonal matrix (p,q), p > q -> vector (i) + +If a matrix is antisymmetric it can be reshaped as a vector. And the +vector can be reshaped as an antisymmetric matrix + +\begin{align*} +\begin{pmatrix} +0 & -1 & -2 & -4 \\ +1 & 0 & -3 & -5 \\ +2 & 3 & 0 & -6 \\ +4 & 5 & 6 & 0 +\end{pmatrix} +\Leftrightarrow +\begin{pmatrix} +1 & 2 & 3 & 4 & 5 & 6 +\end{pmatrix} +\end{align*} + +!!! Here the algorithm only work for the lower diagonal !!! + +Input: +| p,q | integer | indexes of a matrix element in the lower diagonal | +| | | p > q, q -> column | +| | | p -> row, | +| | | q -> column | + +Input: +| i | integer | corresponding index in the vector | + +#+BEGIN_SRC f90 :comments org :tangle mat_to_vec_index.irp.f +subroutine mat_to_vec_index(p,q,i) + + include 'pi.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: p,q + + ! out + integer, intent(out) :: i + + ! internal + integer :: a,b + double precision :: da + + ! Calculation + + a = p-1 + b = a*(a-1)/2 + + i = q+b + +end subroutine +#+END_SRC + diff --git a/src/utils_trust_region/org/rotation_matrix.org b/src/utils_trust_region/org/rotation_matrix.org new file mode 100644 index 00000000..3b2ff437 --- /dev/null +++ b/src/utils_trust_region/org/rotation_matrix.org @@ -0,0 +1,452 @@ +* Rotation matrix + +*Build a rotation matrix from an antisymmetric matrix* + +Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as : +$$ +\textbf{R}=\exp(\textbf{A}) +$$ + +So : +\begin{align*} +\textbf{R}=& \exp(\textbf{A}) \\ +=& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\ +=& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A} +\end{align*} + +With : +$\textbf{W}$ : eigenvectors of $\textbf{A}^2$ +$\tau$ : $\sqrt{-x}$ +$x$ : eigenvalues of $\textbf{A}^2$ + +Input: +| A(n,n) | double precision | antisymmetric matrix | +| n | integer | number of columns of the A matrix | +| LDA | integer | specifies the leading dimension of A, must be at least max(1,n) | +| LDR | integer | specifies the leading dimension of R, must be at least max(1,n) | + +Output: +| R(n,n) | double precision | Rotation matrix | +| info | integer | if info = 0, the execution is successful | +| | | if info = k, the k-th parameter has an illegal value | +| | | if info = -k, the algorithm failed | + +Internal: +| B(n,n) | double precision | B = A.A | +| work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork) | +| lwork | integer | dimension of the syev work array >= max(1, 3n-1) | +| W(n,n) | double precision | eigenvectors of B | +| e_val(n) | double precision | eigenvalues of B | +| m_diag(n,n) | double precision | diagonal matrix with the eigenvalues of B | +| cos_tau(n,n) | double precision | diagonal matrix with cos(tau) values | +| sin_tau(n,n) | double precision | diagonal matrix with sin cos(tau) values | +| tau_m1(n,n) | double precision | diagonal matrix with (tau)^-1 values | +| part_1(n,n) | double precision | matrix W.cos_tau.W^t | +| part_1a(n,n) | double precision | matrix cos_tau.W^t | +| part_2(n,n) | double precision | matrix W.tau_m1.sin_tau.W^t.A | +| part_2a(n,n) | double precision | matrix W^t.A | +| part_2b(n,n) | double precision | matrix sin_tau.W^t.A | +| part_2c(n,n) | double precision | matrix tau_m1.sin_tau.W^t.A | +| RR_t(n,n) | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 | +| norm | integer | norm of R.R^t-1, must be equal to 0 | +| i,j | integer | indexes | + +Functions: +| dnrm2 | double precision | Lapack function, compute the norm of a matrix | +| disnan | logical | Lapack function, check if an element is NaN | + + +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f +subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) + + implicit none + + !BEGIN_DOC + ! Rotation matrix to rotate the molecular orbitals. + ! If the rotation is too large the transformation is not unitary and must be cancelled. + !END_DOC + + include 'pi.h' + + ! Variables + + ! in + integer, intent(in) :: n,LDA,LDR + double precision, intent(inout) :: A(LDA,n) + + ! out + double precision, intent(out) :: R(LDR,n) + integer, intent(out) :: info + logical, intent(out) :: enforce_step_cancellation + + ! internal + double precision, allocatable :: B(:,:) + double precision, allocatable :: work(:,:) + double precision, allocatable :: W(:,:), e_val(:) + double precision, allocatable :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:) + double precision, allocatable :: part_1(:,:),part_1a(:,:) + double precision, allocatable :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:) + double precision, allocatable :: RR_t(:,:) + integer :: i,j + integer :: info2, lwork ! for dsyev + double precision :: norm, max_elem, max_elem_A, t1,t2,t3 + + ! function + double precision :: dnrm2 + logical :: disnan + + print*,'' + print*,'---rotation_matrix---' + + call wall_time(t1) + + ! Allocation + allocate(B(n,n)) + allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n)) + allocate(W(n,n),e_val(n)) + allocate(part_1(n,n),part_1a(n,n)) + allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n)) + allocate(RR_t(n,n)) +#+END_SRC + +** Pre-conditions +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Initialization + info=0 + enforce_step_cancellation = .False. + + ! Size of matrix A must be at least 1 by 1 + if (n<1) then + info = 3 + print*, 'WARNING: invalid parameter 5' + print*, 'n<1' + return + endif + + ! Leading dimension of A must be >= n + if (LDA < n) then + info = 25 + print*, 'WARNING: invalid parameter 2 or 5' + print*, 'LDA < n' + return + endif + + ! Leading dimension of A must be >= n + if (LDR < n) then + info = 4 + print*, 'WARNING: invalid parameter 4' + print*, 'LDR < n' + return + endif + + ! Matrix elements of A must by non-NaN + do j = 1, n + do i = 1, n + if (disnan(A(i,j))) then + info=1 + print*, 'WARNING: invalid parameter 1' + print*, 'NaN element in A matrix' + return + endif + enddo + enddo + + do i = 1, n + if (A(i,i) /= 0d0) then + print*, 'WARNING: matrix A is not antisymmetric' + print*, 'Non 0 element on the diagonal', i, A(i,i) + call ABORT + endif + enddo + + do j = 1, n + do i = 1, n + if (A(i,j)+A(j,i)>1d-16) then + print*, 'WANRING: matrix A is not antisymmetric' + print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i) + print*, 'diff:', A(i,j)+A(j,i) + call ABORT + endif + enddo + enddo + + ! Fix for too big elements ! bad idea better to cancel if the error is too big + !do j = 1, n + ! do i = 1, n + ! A(i,j) = mod(A(i,j),2d0*pi) + ! if (dabs(A(i,j)) > pi) then + ! A(i,j) = 0d0 + ! endif + ! enddo + !enddo + + max_elem_A = 0d0 + do j = 1, n + do i = 1, n + if (ABS(A(i,j)) > ABS(max_elem_A)) then + max_elem_A = A(i,j) + endif + enddo + enddo + !print*,'max element in A', max_elem_A + + if (ABS(max_elem_A) > 2 * pi) then + print*,'' + print*,'WARNING: ABS(max_elem_A) > 2 pi ' + print*,'' + endif + +#+END_SRC + +** Calculations + +*** B=A.A + - Calculation of the matrix $\textbf{B} = \textbf{A}^2$ + - Diagonalization of $\textbf{B}$ + W, the eigenvectors + e_val, the eigenvalues + + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Compute B=A.A + + call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1)) + + ! Copy B in W, diagonalization will put the eigenvectors in W + W=B + + ! Diagonalization of B + ! Eigenvalues -> e_val + ! Eigenvectors -> W + lwork = 3*n-1 + allocate(work(lwork,n)) + + !print*,'Starting diagonalization ...' + + call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2) + + deallocate(work) + + if (info2 < 0) then + print*, 'WARNING: error in the diagonalization' + print*, 'Illegal value of the ', info2,'-th parameter' + elseif (info2 >0) then + print*, "WARNING: Diagonalization failed to converge" + endif + #+END_SRC + +*** Tau^-1, cos(tau), sin(tau) + $$\tau = \sqrt{-x}$$ + - Calculation of $\cos(\tau)$ $\Leftrightarrow$ $\cos(\sqrt{-x})$ + - Calculation of $\sin(\tau)$ $\Leftrightarrow$ $\sin(\sqrt{-x})$ + - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$ + These matrices are diagonals + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Diagonal matrix m_diag + do j = 1, n + if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems + e_val(j) = 0.d0 + else + e_val(j) = - e_val(j) + endif + enddo + + m_diag = 0.d0 + do i = 1, n + m_diag(i,i) = e_val(i) + enddo + + ! cos_tau + do j = 1, n + do i = 1, n + if (i==j) then + cos_tau(i,j) = dcos(dsqrt(e_val(i))) + else + cos_tau(i,j) = 0d0 + endif + enddo + enddo + + ! sin_tau + do j = 1, n + do i = 1, n + if (i==j) then + sin_tau(i,j) = dsin(dsqrt(e_val(i))) + else + sin_tau(i,j) = 0d0 + endif + enddo + enddo + + ! Debug, display the cos_tau and sin_tau matrix + !if (debug) then + ! print*, 'cos_tau' + ! do i = 1, n + ! print*, cos_tau(i,:) + ! enddo + ! print*, 'sin_tau' + ! do i = 1, n + ! print*, sin_tau(i,:) + ! enddo + !endif + + ! tau^-1 + do j = 1, n + do i = 1, n + if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small + tau_m1(i,j) = 1d0/(dsqrt(e_val(i))) + else + tau_m1(i,j) = 0d0 + endif + enddo + enddo + + max_elem = 0d0 + do i = 1, n + if (ABS(tau_m1(i,i)) > ABS(max_elem)) then + max_elem = tau_m1(i,i) + endif + enddo + !print*,'max elem tau^-1:', max_elem + + ! Debug + !print*,'eigenvalues:' + !do i = 1, n + ! print*, e_val(i) + !enddo + + !Debug, display tau^-1 + !if (debug) then + ! print*, 'tau^-1' + ! do i = 1, n + ! print*,tau_m1(i,:) + ! enddo + !endif + #+END_SRC + +*** Rotation matrix + \begin{align*} + \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} + \end{align*} + \begin{align*} + \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \end{align*} + \begin{align*} + \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} + \end{align*} + + First: + part_1 = dgemm(W, dgemm(cos_tau, W^t)) + part_1a = dgemm(cos_tau, W^t) + part_1 = dgemm(W, part_1a) + And: + part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A)))) + part_2a = dgemm(W^t, A) + part_2b = dgemm(sin_tau, part_2a) + part_2c = dgemm(tau_m1, part_2b) + part_2 = dgemm(W, part_2c) + Finally: + Rotation matrix, R = part_1+part_2 + + If $R$ is a rotation matrix: + $R.R^T=R^T.R=\textbf{1}$ + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! part_1 + call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1)) + call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1)) + + ! part_2 + call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1)) + call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1)) + call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1)) + call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1)) + + ! Rotation matrix R + R = part_1 + part_2 + + ! Matrix check + ! R.R^t and R^t.R must be equal to identity matrix + do j = 1, n + do i=1,n + if (i==j) then + RR_t(i,j) = 1d0 + else + RR_t(i,j) = 0d0 + endif + enddo + enddo + + call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1)) + + norm = dnrm2(n*n,RR_t,1) + !print*, 'Rotation matrix check, norm R.R^T = ', norm + + ! Debug + !if (debug) then + ! print*, 'RR_t' + ! do i = 1, n + ! print*, RR_t(i,:) + ! enddo + !endif + #+END_SRC + +*** Post conditions + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Check if R.R^T=1 + max_elem = 0d0 + do j = 1, n + do i = 1, n + if (ABS(RR_t(i,j)) > ABS(max_elem)) then + max_elem = RR_t(i,j) + endif + enddo + enddo + + print*, 'Max error in R.R^T:', max_elem + !print*, 'e_val(1):', e_val(1) + !print*, 'e_val(n):', e_val(n) + !print*, 'max elem in A:', max_elem_A + + if (ABS(max_elem) > 1d-12) then + print*, 'WARNING: max error in R.R^T > 1d-12' + print*, 'Enforce the step cancellation' + enforce_step_cancellation = .True. + endif + + ! Matrix elements of R must by non-NaN + do j = 1,n + do i = 1,LDR + if (disnan(R(i,j))) then + info = 666 + print*, 'NaN in rotation matrix' + call ABORT + endif + enddo + enddo + + ! Display + !if (debug) then + ! print*,'Rotation matrix :' + ! do i = 1, n + ! write(*,'(100(F10.5))') R(i,:) + ! enddo + !endif + #+END_SRC + +** Deallocation, end + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + deallocate(B) + deallocate(m_diag,cos_tau,sin_tau,tau_m1) + deallocate(W,e_val) + deallocate(part_1,part_1a) + deallocate(part_2,part_2a,part_2b,part_2c) + deallocate(RR_t) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in rotation matrix:', t3 + + print*,'---End rotation_matrix---' + +end subroutine + #+END_SRC + diff --git a/src/utils_trust_region/org/rotation_matrix_iterative.org b/src/utils_trust_region/org/rotation_matrix_iterative.org new file mode 100644 index 00000000..f6cc9909 --- /dev/null +++ b/src/utils_trust_region/org/rotation_matrix_iterative.org @@ -0,0 +1,136 @@ +* Rotation matrix with the iterative method + +\begin{align*} +\textbf{R} = \sum_{k=0}^{\infty} \frac{1}{k!} \textbf{X}^k +\end{align*} + +!!! Doesn't work !!! + +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix_iterative.irp.f +subroutine rotation_matrix_iterative(m,X,R) + + implicit none + + ! in + integer, intent(in) :: m + double precision, intent(in) :: X(m,m) + + ! out + double precision, intent(out) :: R(m,m) + + ! internal + double precision :: max_elem, pre_factor + double precision :: t1,t2,t3 + integer :: k,l,i,j + logical :: not_converged + double precision, allocatable :: RRT(:,:), A(:,:), B(:,:) + + ! Functions + integer :: factorial + + print*,'---rotation_matrix_iterative---' + call wall_time(t1) + + allocate(RRT(m,m),A(m,m),B(m,m)) + + ! k = 0 + R = 0d0 + do i = 1, m + R(i,i) = 1d0 + enddo + + ! k = 1 + R = R + X + + k = 2 + + not_converged = .True. + + do while (not_converged) + + pre_factor = 1d0/DBLE(factorial(k)) + if (pre_factor < 1d-15) then + print*,'pre factor=', pre_factor,'< 1d-15, exit' + exit + endif + + A = X + B = 0d0 + do l = 1, k-1 + call dgemm('N','N',m,m,m,1d0,X,size(X,1),A,size(A,1),0d0,B,size(B,1)) + A = B + enddo + + !print*,'B' + !do i = 1, m + ! print*,B(i,:) * 1d0/DBLE(factorial(k)) + !enddo + + R = R + pre_factor * B + + k = k + 1 + call dgemm('T','N',m,m,m,1d0,R,size(R,1),R,size(R,1),0d0,RRT,size(RRT,1)) + + !print*,'R' + !do i = 1, m + ! write(*,'(10(E12.5))') R(i,:) + !enddo + + do i = 1, m + RRT(i,i) = RRT(i,i) - 1d0 + enddo + + !print*,'RRT' + !do i = 1, m + ! write(*,'(10(E12.5))') RRT(i,:) + !enddo + + max_elem = 0d0 + do j = 1, m + do i = 1, m + if (dabs(RRT(i,j)) > max_elem) then + max_elem = dabs(RRT(i,j)) + endif + enddo + enddo + + print*, 'Iteration:', k + print*, 'Max error in R:', max_elem + + if (max_elem < 1d-12) then + not_converged = .False. + endif + + enddo + + deallocate(RRT,A,B) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in rotation matrix iterative:', t3 + print*,'---End roration_matrix_iterative---' + + +print*,'Does not work yet, abort' +call abort + +end +#+END_SRC + +** Factorial +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix_iterative.irp.f +function factorial(n) + + implicit none + + integer, intent(in) :: n + integer :: factorial, k + + factorial = 1 + + do k = 1, n + factorial = factorial * k + enddo + +end +#+END_SRC diff --git a/src/utils_trust_region/org/sub_to_full_rotation_matrix.org b/src/utils_trust_region/org/sub_to_full_rotation_matrix.org new file mode 100644 index 00000000..f0cf0bfc --- /dev/null +++ b/src/utils_trust_region/org/sub_to_full_rotation_matrix.org @@ -0,0 +1,65 @@ +* Rotation matrix in a subspace to rotation matrix in the full space + +Usually, we are using a list of MOs, for exemple the active ones. When +we compute a rotation matrix to rotate the MOs, we just compute a +rotation matrix for these MOs in order to reduce the size of the +matrix which has to be computed. Since the computation of a rotation +matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to +reuce the number of MOs involved. +After that we replace the rotation matrix in the full space by +building the elements of the rotation matrix in the full space from +the elements of the rotation matrix in the subspace and adding some 0 +on the extradiagonal elements and some 1 on the diagonal elements, +for the MOs that are not involved in the rotation. + +Provided: +| mo_num | integer | Number of MOs | + +Input: +| m | integer | Size of tmp_list, m <= mo_num | +| tmp_list(m) | integer | List of MOs | +| tmp_R(m,m) | double precision | Rotation matrix in the space of | +| | | the MOs containing by tmp_list | + +Output: +| R(mo_num,mo_num | double precision | Rotation matrix in the space | +| | | of all the MOs | + +Internal: +| i,j | integer | indexes in the full space | +| tmp_i,tmp_j | integer | indexes in the subspace | + +#+BEGIN_SRC f90 :comments org :tangle sub_to_full_rotation_matrix.irp.f +subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) + + !BEGIN_DOC + ! Compute the full rotation matrix from a smaller one + !END_DOC + + implicit none + + ! in + integer, intent(in) :: m, tmp_list(m) + double precision, intent(in) :: tmp_R(m,m) + + ! out + double precision, intent(out) :: R(mo_num,mo_num) + + ! internal + integer :: i,j,tmp_i,tmp_j + + ! tmp_R to R, subspace to full space + R = 0d0 + do i = 1, mo_num + R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital + enddo + do tmp_j = 1, m + j = tmp_list(tmp_j) + do tmp_i = 1, m + i = tmp_list(tmp_i) + R(i,j) = tmp_R(tmp_i,tmp_j) + enddo + enddo + +end +#+END_SRC diff --git a/src/utils_trust_region/org/trust_region_expected_e.org b/src/utils_trust_region/org/trust_region_expected_e.org new file mode 100644 index 00000000..9d2868fa --- /dev/null +++ b/src/utils_trust_region/org/trust_region_expected_e.org @@ -0,0 +1,128 @@ +* Predicted energy : e_model + +*Compute the energy predicted by the Taylor series* + +The energy is predicted using a Taylor expansion truncated at te 2nd +order : + +\begin{align*} +E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2) +\end{align*} + +Input: +| n | integer | m*(m-1)/2 | +| n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +| v_grad(n) | double precision | gradient | +| H(n,n) | double precision | hessian | +| x(n) | double precision | Step in the trust region | +| prev_energy | double precision | previous energy | + +Output: +| e_model | double precision | predicted energy after the rotation of the MOs | + +Internal: +| part_1 | double precision | v_grad^T.x | +| part_2 | double precision | 1/2 . x^T.H.x | +| part_2a | double precision | H.x | +| i,j | integer | indexes | + +Function: +| ddot | double precision | dot product (Lapack) | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f +subroutine trust_region_expected_e(n,n2,v_grad,H,x,prev_energy,e_model) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the expected criterion/energy after the application of the step x + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n,n2 + double precision, intent(in) :: v_grad(n),H(n,n2),x(n) + double precision, intent(in) :: prev_energy + + ! out + double precision, intent(out) :: e_model + + ! internal + double precision :: part_1, part_2, t1,t2,t3 + double precision, allocatable :: part_2a(:) + + integer :: i,j + + !Function + double precision :: ddot + + print*,'' + print*,'---Trust_e_model---' + + call wall_time(t1) + + ! Allocation + allocate(part_2a(n)) +#+END_SRC + +** Calculations + +part_1 corresponds to the product g.x +part_2a corresponds to the product H.x +part_2 corresponds to the product 0.5*(x^T.H.x) + +TODO: remove the dot products + +#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f + ! Product v_grad.x + part_1 = ddot(n,v_grad,1,x,1) + + !if (debug) then + ! print*,'g.x : ', part_1 + !endif + + ! Product H.x + if (n == n2) then + call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) + else + ! If the hessian is diagonal + do i = 1, n + part_2a(i) = H(i,1) * x(i) + enddo + endif + + ! Product 1/2 . x^T.H.x + part_2 = 0.5d0 * ddot(n,x,1,part_2a,1) + + !if (debug) then + ! print*,'1/2*x^T.H.x : ', part_2 + !endif + + + ! Sum + e_model = prev_energy + part_1 + part_2 + + ! Writing the predicted energy + print*, 'prev_energy: ', prev_energy + print*, 'Predicted energy after the rotation:', e_model + print*, 'Previous energy - predicted energy: ', prev_energy - e_model + + ! Can be deleted, already in another subroutine + if (DABS(prev_energy - e_model) < 1d-12 ) then + print*,'WARNING: ABS(prev_energy - e_model) < 1d-12' + endif + + ! Deallocation + deallocate(part_2a) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust e model:', t3 + + print*,'---End trust_e_model---' + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/org/trust_region_optimal_lambda.org b/src/utils_trust_region/org/trust_region_optimal_lambda.org new file mode 100644 index 00000000..ff454cb6 --- /dev/null +++ b/src/utils_trust_region/org/trust_region_optimal_lambda.org @@ -0,0 +1,1661 @@ +* Newton's method to find the optimal lambda + +*Compute the lambda value for the trust region* + +This subroutine uses the Newton method in order to find the optimal +lambda. This constant is added on the diagonal of the hessian to shift +the eiganvalues. It has a double role: +- ensure that the resulting hessian is positive definite for the + Newton method +- constrain the step in the trust region, i.e., + $||\textbf{x}(\lambda)|| \leq \Delta$, where $\Delta$ is the radius + of the trust region. +We search $\lambda$ which minimizes +\begin{align*} + f(\lambda) = (||\textbf{x}_{(k+1)}(\lambda)||^2 -\Delta^2)^2 +\end{align*} +or +\begin{align*} + \tilde{f}(\lambda) = (\frac{1}{||\textbf{x}_{(k+1)}(\lambda)||^2}-\frac{1}{\Delta^2})^2 +\end{align*} +and gives obviously 0 in both cases. \newline + +There are several cases: +- If $\textbf{H}$ is positive definite the interval containing the + solution is $\lambda \in (0, \infty)$ (and $-h_1 < 0$). +- If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot + \textbf{g} \neq 0$ then the interval containing + the solution is $\lambda \in (-h_1, \infty)$. +- If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot + \textbf{g} = 0$ then the interval containing the solution is + $\lambda \in (-h_1, \infty)$. The terms where $|h_i - \lambda| < + 10^{-12}$ are not computed, so the term where $i = 1$ is + automatically removed and this case becomes similar to the previous one. + +So to avoid numerical problems (cf. trust_region) we start the +algorithm at $\lambda=\max(0 + \epsilon,-h_1 + \epsilon)$, +with $\epsilon$ a little constant. +The research must be restricted to the interval containing the +solution. For that reason a little trust region in 1D is used. + +The Newton method to find the optimal $\lambda$ is : +\begin{align*} + \lambda_{(l+1)} &= \lambda_{(l)} - f^{''}(\lambda)_{(l)}^{-1} f^{'}(\lambda)_{(l)}^{} \\ +\end{align*} +$f^{'}(\lambda)_{(l)}$: the first derivative of $f$ with respect to +$\lambda$ at the l-th iteration, +$f^{''}(\lambda)_{(l)}$: the second derivative of $f$ with respect to +$\lambda$ at the l-th iteration.\newline + +Noting the Newton step $y = - f^{''}(\lambda)_{(l)}^{-1} +f^{'}(\lambda)_{(l)}^{}$ we constrain $y$ such as +\begin{align*} + y \leq \alpha +\end{align*} +with $\alpha$ a scalar representing the trust length (trust region in +1D) where the function $f$ or $\tilde{f}$ is correctly describe by the +Taylor series truncated at the second order. Thus, if $y > \alpha$, +the constraint is applied as +\begin{align*} + y^* = \alpha \frac{y}{|y|} +\end{align*} +with $y^*$ the solution in the trust region. + +The size of the trust region evolves in function of $\rho$ as for the +trust region seen previously cf. trust_region, rho_model. +The prediction of the value of $f$ or $\tilde{f}$ is done using the +Taylor series truncated at the second order cf. "trust_region", +"trust_e_model". + +The first and second derivatives of $f(\lambda) = (||\textbf{x}(\lambda)||^2 - +\Delta^2)^2$ with respect to $\lambda$ are: +\begin{align*} + \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 + = 2 \left(\sum_{i=1}^n \frac{-2(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) + \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +\end{align*} +\begin{align*} +\frac{\partial^2}{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +\end{align*} + +The first and second derivatives of $\tilde{f}(\lambda) = (1/||\textbf{x}(\lambda)||^2 - +1/\Delta^2)^2$ with respect to $\lambda$ are: +\begin{align*} + \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{4}{\Delta^2} \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ + &= 4 \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} + \left( \frac{1}{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{1}{\Delta^2 (\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +\end{align*} + +\begin{align*} + \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} + - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ + &- \frac{4}{\Delta^2} \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2} + {(h_i + \lambda)^3)})^2}{(\sum_ {i=1}^n\frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +\end{align*} + +Provided in qp_edit: +| thresh_rho_2 | +| thresh_cc | +| nb_it_max_lambda | +| version_lambda_search | +| nb_it_max_pre_search | +see qp_edit for more details + +Input: +| n | integer | m*(m-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| tmp_wtg(n) | double precision | w_i^T.v_grad(i) | +| delta | double precision | delta for the trust region | + +Output: +| lambda | double precision | Lagrange multiplier to constrain the norm of the size of the Newton step | +| | | lambda > 0 | + +Internal: +| d1_N | double precision | value of d1_norm_trust_region | +| d2_N | double precision | value of d2_norm_trust_region | +| f_N | double precision | value of f_norm_trust_region | +| prev_f_N | double precision | previous value of f_norm_trust_region | +| f_R | double precision | (norm(x)^2 - delta^2)^2 or (1/norm(x)^2 - 1/delta^2)^2 | +| prev_f_R | double precision | previous value of f_R | +| model | double precision | predicted value of f_R from prev_f_R and y | +| d_1 | double precision | value of the first derivative | +| d_2 | double precision | value of the second derivative | +| y | double precision | Newton's step, y = -f''^-1 . f' = lambda - prev_lambda | +| prev_lambda | double precision | previous value of lambda | +| t1,t2,t3 | double precision | wall time | +| i | integer | index | +| epsilon | double precision | little constant to avoid numerical problem | +| rho_2 | double precision | (prev_f_R - f_R)/(prev_f_R - model), agreement between model and f_R | +| version | integer | version of the root finding method | + +Function: +| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +| d2_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +| d1_norm_inverse_trust_region | double precision | first derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +| d2_norm_inverse_trust_region | double precision | second derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +| f_norm_trust_region | double precision | value of norm(x)^2 | + + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + + include 'pi.h' + + !BEGIN_DOC + ! Research the optimal lambda to constrain the step size in the trust region + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(inout) :: e_val(n) + double precision, intent(in) :: delta + double precision, intent(in) :: tmp_wtg(n) + + ! out + double precision, intent(out) :: lambda + + ! Internal + double precision :: d1_N, d2_N, f_N, prev_f_N + double precision :: prev_f_R, f_R + double precision :: model + double precision :: d_1, d_2 + double precision :: t1,t2,t3 + integer :: i + double precision :: epsilon + double precision :: y + double precision :: prev_lambda + double precision :: rho_2 + double precision :: alpha + integer :: version + + ! Functions + double precision :: d1_norm_trust_region,d1_norm_trust_region_omp + double precision :: d2_norm_trust_region, d2_norm_trust_region_omp + double precision :: f_norm_trust_region, f_norm_trust_region_omp + double precision :: d1_norm_inverse_trust_region + double precision :: d2_norm_inverse_trust_region + double precision :: d1_norm_inverse_trust_region_omp + double precision :: d2_norm_inverse_trust_region_omp + + print*,'' + print*,'---Trust_newton---' + + call wall_time(t1) + + ! version_lambda_search + ! 1 -> ||x||^2 - delta^2 = 0, + ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better) + !if (version_lambda_search == 1) then + ! print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' + !else + ! print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' + !endif + ! Version 2 is normally better +#+END_SRC + +Resolution with the Newton method: + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f + ! Initialization + epsilon = 1d-4 + lambda = max(0d0, -e_val(1)) + + ! Pre research of lambda to start near the optimal lambda + ! by adding a constant epsilon and changing the constant to + ! have ||x(lambda + epsilon)|| ~ delta, before setting + ! lambda = lambda + epsilon + !print*, 'Pre research of lambda:' + !print*,'Initial lambda =', lambda + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + !print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta + i = 1 + + ! To increase lambda + if (f_N > delta**2) then + !print*,'Increasing lambda...' + do while (f_N > delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 2d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + !print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N < f_N) then + print*,'WARNING, error: prev_f_N < f_N, exit' + epsilon = epsilon * 0.5d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + + ! To reduce lambda + else + !print*,'Reducing lambda...' + do while (f_N < delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 0.5d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + !print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N > f_N) then + print*,'WARNING, error: prev_f_N > f_N, exit' + epsilon = epsilon * 2d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + endif + + !print*,'End of the pre research of lambda' + + ! New value of lambda + lambda = lambda + epsilon + + !print*, 'e_val(1):', e_val(1) + !print*, 'Staring point, lambda =', lambda + + ! thresh_cc, threshold for the research of the optimal lambda + ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc + ! thresh_rho_2, threshold to cancel the step in the research + ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2 + + !print*,'Threshold for the CC:', thresh_cc + !print*,'Threshold for rho_2:', thresh_rho_2 + !print*, 'w_1^T . g =', tmp_wtg(1) + + ! Debug + !print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' + + ! Initialization + i = 1 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) ! Value of the ||x(lambda)||^2 + model = 0d0 ! predicted value of (||x||^2 - delta^2)^2 + prev_f_N = 0d0 ! previous value of ||x||^2 + prev_f_R = 0d0 ! previous value of (||x||^2 - delta^2)^2 + f_R = 0d0 ! value of (||x||^2 - delta^2)^2 + rho_2 = 0d0 ! (prev_f_R - f_R)/(prev_f_R - m) + y = 0d0 ! step size + prev_lambda = 0d0 ! previous lambda + + ! Derivatives + if (version_lambda_search == 1) then + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + + ! Trust length + alpha = DABS((1d0/d_2)*d_1) + + ! Newton's method + do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc) + !print*,'--------------------------------------' + !print*,'Research of lambda, iteration:', i + !print*,'--------------------------------------' + + ! Update of f_N, f_R and the derivatives + prev_f_N = f_N + if (version_lambda_search == 1) then + prev_f_R = (prev_f_N - delta**2)**2 + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + prev_f_R = (1d0/prev_f_N - 1d0/delta**2)**2 + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + !write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 + + ! Newton's step + y = -(1d0/DABS(d_2))*d_1 + + ! Constraint on y (the newton step) + if (DABS(y) > alpha) then + y = alpha * (y/DABS(y)) ! preservation of the sign of y + endif + !write(*,'(a,E12.5)') ' Step length: ', y + + ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series + model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 + + ! Updates lambda + prev_lambda = lambda + lambda = prev_lambda + y + !print*,'prev lambda:', prev_lambda + !print*,'new lambda:', lambda + + ! Checks if lambda is in (-h_1, \infty) + if (lambda > MAX(0d0, -e_val(1))) then + ! New value of ||x(lambda)||^2 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + ! New f_R + if (version_lambda_search == 1) then + f_R = (f_N - delta**2)**2 ! new value of (||x(lambda)||^2 - delta^2)^2 + else + f_R = (1d0/f_N - 1d0/delta**2)**2 ! new value of (1/||x(lambda)||^2 -1/delta^2)^2 + endif + + !if (version_lambda_search == 1) then + ! print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R + ! print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R + ! print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model + !else + ! print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R + ! print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R + ! print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model + !endif + + !print*,'previous - actual:', prev_f_R - f_R + !print*,'previous - model:', prev_f_R - model + + ! Check the gain + if (DABS(prev_f_R - model) < thresh_model_2) then + print*,'' + print*,'WARNING: ABS(previous - model) <', thresh_model_2, 'rho_2 will tend toward infinity' + print*,'' + endif + + ! Will be deleted + !if (prev_f_R - f_R <= 1d-16 .or. prev_f_R - model <= 1d-16) then + ! print*,'' + ! print*,'WARNING: ABS(previous - model) <= 1d-16, exit' + ! print*,'' + ! exit + !endif + + ! Computes rho_2 + rho_2 = (prev_f_R - f_R)/(prev_f_R - model) + !print*,'rho_2:', rho_2 + else + rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty) + !print*,'lambda < -e_val(1) ===> rho_2 = 0' + endif + + ! Evolution of the trust length, alpha + if (rho_2 >= 0.75d0) then + alpha = 2d0 * alpha + elseif (rho_2 >= 0.5d0) then + alpha = alpha + elseif (rho_2 >= 0.25d0) then + alpha = 0.5d0 * alpha + else + alpha = 0.25d0 * alpha + endif + !write(*,'(a,E12.5)') ' New trust length alpha: ', alpha + + ! cancellaion of the step if rho < 0.1 + if (rho_2 < thresh_rho_2) then !0.1d0) then + lambda = prev_lambda + f_N = prev_f_N + !print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' + endif + + !print*,'' + !print*,'lambda, ||x||, delta:' + !print*, lambda, dsqrt(f_N), delta + !print*,'CC:', DABS(1d0 - f_N/delta**2) + !print*,'' + + i = i + 1 + enddo + + ! if trust newton failed + if (i > nb_it_max_lambda) then + print*,'' + print*,'######################################################' + print*,'WARNING: i >', nb_it_max_lambda,'for the trust Newton' + print*,'The research of the optimal lambda has failed' + print*,'######################################################' + print*,'' + endif + + print*,'Number of iterations:', i + print*,'Value of lambda:', lambda + !print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 + print*,'Convergence criterion:', 1d0-f_N/delta**2 + !print*,'Error on the trust region (||x||^2 - delta^2)^2):', (f_N - delta**2)**2 + !print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 + + ! Time + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust_newton:', t3 + + print*,'---End trust_newton---' + +end subroutine +#+END_SRC + +* OMP: First derivative of (||x||^2 - Delta^2)^2 + +*Function to compute the first derivative of (||x||^2 - Delta^2)^2* + +This function computes the first derivative of (||x||^2 - Delta^2)^2 +with respect to lambda. + +\begin{align*} +\frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += -4 \left(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} \right) +\left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i+ \lambda)^2} \right) +\end{align*} + +\begin{align*} + \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2} \\ + \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} + +Provided: +| mo_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg,accu1,accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1,accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_trust_region_omp = -4d0 * accu2 * (accu1 - delta**2) + + deallocate(tmp_accu1, tmp_accu2) + +end function +#+END_SRC + +* OMP: Second derivative of (||x||^2 - Delta^2)^2 + +*Function to compute the second derivative of (||x||^2 - Delta^2)^2* + +This function computes the second derivative of (||x||^2 - Delta^2)^2 +with respect to lambda. +\begin{align*} +\frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +\end{align*} + +\begin{align*} + \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ + \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ + \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| accu3 | double precision | third sum of the formula | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_accu2 | double precision | temporary array for the third sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d2_norm_trust_region | double precision | second derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region_omp + double precision :: ddot + + ! Internal + double precision :: accu1,accu2,accu3 + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + integer :: i, j + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + + ! accu1 + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + ! accu2 + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + ! accu3 + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + d2_norm_trust_region_omp = 2d0 * (6d0 * accu3 * (- delta**2 + accu1) + (-2d0 * accu2)**2) + + deallocate(tmp_accu1, tmp_accu2, tmp_accu3) + +end function +#+END_SRC + +* OMP: Function value of ||x||^2 + +*Compute the value of ||x||^2* + +This function computes the value of ||x(lambda)||^2 + +\begin{align*} +||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | + +Internal: +| tmp_wtg(n) | double precision | temporary array for W^T.v_grad | +| tmp_fN | double precision | temporary array for the function | +| i,j | integer | indexes | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + use omp_lib + + include 'pi.h' + + !BEGIN_DOC + ! Compute ||x(lambda)||^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! functions + double precision :: f_norm_trust_region_omp + + ! internal + double precision, allocatable :: tmp_fN(:) + integer :: i,j + + ! Allocation + allocate(tmp_fN(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_fN, tmp_wtg, f_norm_trust_region_omp) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + f_norm_trust_region_omp = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_fN(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_fN(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + f_norm_trust_region_omp = f_norm_trust_region_omp + tmp_fN(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + deallocate(tmp_fN) + +end function +#+END_SRC + +* First derivative of (||x||^2 - Delta^2)^2 +Version without omp + +*Function to compute the first derivative of ||x||^2 - Delta* + +This function computes the first derivative of (||x||^2 - Delta^2)^2 +with respect to lambda. + +\begin{align*} +\frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left(-2\sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) +\left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +\end{align*} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +| ddot | double precision | blas dot product | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i, j + + ! Functions + double precision :: d1_norm_trust_region + double precision :: ddot + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_trust_region = 2d0 * accu2 * (accu1 - delta**2) + +end function +#+END_SRC + +* Second derivative of (||x||^2 - Delta^2)^2 +Version without OMP + +*Function to compute the second derivative of ||x||^2 - Delta* + + +\begin{equation} +\frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +\end{equation} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| accu3 | double precision | third sum of the formula | +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d2_norm_trust_region | double precision | second derivative with respect to lambda of norm(x)^2 - Delta^2 | +| ddot | double precision | blas dot product | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region + double precision :: ddot + + ! Internal + double precision :: wtg,accu1,accu2,accu3 + integer :: i, j + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 !4 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 !2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu3 = accu3 + 6d0 * wtg**2 / (e_val(i) + lambda)**4 !3 + endif + enddo + + d2_norm_trust_region = 2d0 * (accu3 * (- delta**2 + accu1) + accu2**2) + +end function +#+END_SRC + +* Function value of ||x||^2 +Version without OMP + +*Compute the value of ||x||^2* + +This function computes the value of ||x(lambda)||^2 + +\begin{align*} +||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| f_norm_trust_region | double precision | value of norm(x)^2 | +| ddot | double precision | blas dot product | + + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function f_norm_trust_region(n,e_val,tmp_wtg,lambda) + + include 'pi.h' + + !BEGIN_DOC + ! Compute ||x(lambda)||^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! function + double precision :: f_norm_trust_region + double precision :: ddot + + ! internal + integer :: i,j + + ! Initialization + f_norm_trust_region = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + f_norm_trust_region = f_norm_trust_region + tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + +end function +#+END_SRC + +* OMP: First derivative of (1/||x||^2 - 1/Delta^2)^2 +Version with OMP + +*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ + &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} + \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +\end{align*} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1, accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_inverse_trust_region_omp = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + + deallocate(tmp_accu1, tmp_accu2) + +end +#+END_SRC + +* OMP: Second derivative of (1/||x||^2 - 1/Delta^2)^2 +Version with OMP + +*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ + &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +\end{align*} + + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2, accu3 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + + ! Functions + double precision :: d2_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d2_norm_inverse_trust_region_omp = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + + deallocate(tmp_accu1,tmp_accu2,tmp_accu3) + +end +#+END_SRC + +* First derivative of (1/||x||^2 - 1/Delta^2)^2 +Version without OMP + +*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ + &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} + \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +\end{align*} +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i,j + + ! Functions + double precision :: d1_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_inverse_trust_region = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + +end +#+END_SRC + +* Second derivative of (1/||x||^2 - 1/Delta^2)^2 +Version without OMP + +*Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ + &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +\end{align*} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d2_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2, accu3 + integer :: i,j + + ! Functions + double precision :: d2_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu3 = accu3 + wtg**2 / (e_val(i) + lambda)**4 + endif + enddo + + d2_norm_inverse_trust_region = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + +end +#+END_SRC diff --git a/src/utils_trust_region/org/trust_region_rho.org b/src/utils_trust_region/org/trust_region_rho.org new file mode 100644 index 00000000..b669da8c --- /dev/null +++ b/src/utils_trust_region/org/trust_region_rho.org @@ -0,0 +1,122 @@ +* Agreement with the model: Rho + +*Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)* + +Rho represents the agreement between the model (the predicted energy +by the Taylor expansion truncated at the 2nd order) and the real +energy : + +\begin{equation} +\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +\end{equation} +With : +$E^{k}$ the energy at the previous iteration +$E^{k+1}$ the energy at the actual iteration +$m^{k+1}$ the predicted energy for the actual iteration +(cf. trust_e_model) + +If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$. +If $\rho \leq 0$ the previous energy is lower than the actual +energy. We have to cancel the last step and use a smaller trust +region. +Here we cancel the last step if $\rho < 0.1$, because even if +the energy decreases, the agreement is bad, i.e., the Taylor expansion +truncated at the second order doesn't represent correctly the energy +landscape. So it's better to cancel the step and restart with a +smaller trust region. + +Provided in qp_edit: +| thresh_rho | + +Input: +| prev_energy | double precision | previous energy (energy before the rotation) | +| e_model | double precision | predicted energy after the rotation | + +Output: +| rho | double precision | the agreement between the model (predicted) and the real energy | +| prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy | +| | | else the previous energy doesn't change | + +Internal: +| energy | double precision | energy (real) after the rotation | +| i | integer | index | +| t* | double precision | time | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f +subroutine trust_region_rho(prev_energy, energy,e_model,rho) + + include 'pi.h' + + !BEGIN_DOC + ! Compute rho, the agreement between the predicted criterion/energy and the real one + !END_DOC + + implicit none + + ! Variables + + ! In + double precision, intent(inout) :: prev_energy + double precision, intent(in) :: e_model, energy + + ! Out + double precision, intent(out) :: rho + + ! Internal + double precision :: t1, t2, t3 + integer :: i + + print*,'' + print*,'---Rho_model---' + + !call wall_time(t1) +#+END_SRC + +** Rho +\begin{equation} +\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +\end{equation} + +In function of $\rho$ th step can be accepted or cancelled. + +If we cancel the last step (k+1), the previous energy (k) doesn't +change! +If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) + +#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f + ! Already done in an other subroutine + !if (ABS(prev_energy - e_model) < 1d-12) then + ! print*,'WARNING: prev_energy - e_model < 1d-12' + ! print*,'=> rho will tend toward infinity' + ! print*,'Check you convergence criterion !' + !endif + + rho = (prev_energy - energy) / (prev_energy - e_model) + + !print*, 'previous energy, prev_energy:', prev_energy + !print*, 'predicted energy, e_model:', e_model + !print*, 'real energy, energy:', energy + !print*, 'prev_energy - energy:', prev_energy - energy + !print*, 'prev_energy - e_model:', prev_energy - e_model + print*, 'Rho:', rho + !print*, 'Threshold for rho:', thresh_rho + + ! Modification of prev_energy in function of rho + if (rho < thresh_rho) then !0.1) then + ! the step is cancelled + print*, 'Rho <', thresh_rho,', the previous energy does not changed' + !print*, 'prev_energy :', prev_energy + else + ! the step is accepted + prev_energy = energy + print*, 'Rho >=', thresh_rho,', energy -> prev_energy:', energy + endif + + !call wall_time(t2) + !t3 = t2 - t1 + !print*,'Time in rho model:', t3 + + print*,'---End rho_model---' + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/org/trust_region_step.org b/src/utils_trust_region/org/trust_region_step.org new file mode 100644 index 00000000..0e5f090f --- /dev/null +++ b/src/utils_trust_region/org/trust_region_step.org @@ -0,0 +1,759 @@ +* Trust region + +*Compute the next step with the trust region algorithm* + +The Newton method is an iterative method to find a minimum of a given +function. It uses a Taylor series truncated at the second order of the +targeted function and gives its minimizer. The minimizer is taken as +the new position and the same thing is done. And by doing so +iteratively the method find a minimum, a local or global one depending +of the starting point and the convexity/nonconvexity of the targeted +function. + +The goal of the trust region is to constrain the step size of the +Newton method in a certain area around the actual position, where the +Taylor series is a good approximation of the targeted function. This +area is called the "trust region". + +In addition, in function of the agreement between the Taylor +development of the energy and the real energy, the size of the trust +region will be updated at each iteration. By doing so, the step sizes +are not too larges. In addition, since we add a criterion to cancel the +step if the energy increases (more precisely if rho < 0.1), so it's +impossible to diverge. \newline + +References: \newline +Nocedal & Wright, Numerical Optimization, chapter 4 (1999), \newline +https://link.springer.com/book/10.1007/978-0-387-40065-5, \newline +ISBN: 978-0-387-40065-5 \newline + +By using the first and the second derivatives, the Newton method gives +a step: +\begin{align*} + \textbf{x}_{(k+1)}^{\text{Newton}} = - \textbf{H}_{(k)}^{-1} \cdot + \textbf{g}_{(k)} +\end{align*} +which leads to the minimizer of the Taylor series. +!!! Warning: the Newton method gives the minimizer if and only if +$\textbf{H}$ is positive definite, else it leads to a saddle point !!! +But we want a step $\textbf{x}_{(k+1)}$ with a constraint on its (euclidian) norm: +\begin{align*} + ||\textbf{x}_{(k+1)}|| \leq \Delta_{(k+1)} +\end{align*} +which is equivalent to +\begin{align*} + \textbf{x}_{(k+1)}^T \cdot \textbf{x}_{(k+1)} \leq \Delta_{(k+1)}^2 +\end{align*} + +with: \newline +$\textbf{x}_{(k+1)}$ is the step for the k+1-th iteration (vector of +size n) \newline +$\textbf{H}_{(k)}$ is the hessian at the k-th iteration (n by n +matrix) \newline +$\textbf{g}_{(k)}$ is the gradient at the k-th iteration (vector of +size n) \newline +$\Delta_{(k+1)}$ is the trust radius for the (k+1)-th iteration +\newline + +Thus we want to constrain the step size $\textbf{x}_{(k+1)}$ into a +hypersphere of radius $\Delta_{(k+1)}$.\newline + +So, if $||\textbf{x}_{(k+1)}^{\text{Newton}}|| \leq \Delta_{(k)}$ and +$\textbf{H}$ is positive definite, the +solution is the step given by the Newton method +$\textbf{x}_{(k+1)} = \textbf{x}_{(k+1)}^{\text{Newton}}$. +Else we have to constrain the step size. For simplicity we will remove +the index $_{(k)}$ and $_{(k+1)}$. To restict the step size, we have +to put a constraint on $\textbf{x}$ with a Lagrange multiplier. +Starting from the Taylor series of a function E (here, the energy) +truncated at the 2nd order, we have: +\begin{align*} + E(\textbf{x}) = E +\textbf{g}^T \cdot \textbf{x} + \frac{1}{2} + \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} + + \mathcal{O}(\textbf{x}^2) +\end{align*} + +With the constraint on the norm of $\textbf{x}$ we can write the +Lagrangian +\begin{align*} + \mathcal{L}(\textbf{x},\lambda) = E + \textbf{g}^T \cdot \textbf{x} + + \frac{1}{2} \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} + + \frac{1}{2} \lambda (\textbf{x}^T \cdot \textbf{x} - \Delta^2) +\end{align*} +Where: \newline +$\lambda$ is the Lagrange multiplier \newline +$E$ is the energy at the k-th iteration $\Leftrightarrow +E(\textbf{x} = \textbf{0})$ \newline + +To solve this equation, we search a stationary point where the first +derivative of $\mathcal{L}$ with respect to $\textbf{x}$ becomes 0, i.e. +\begin{align*} + \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}=0 +\end{align*} + +The derivative is: +\begin{align*} + \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} + = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} +\end{align*} + +So, we search $\textbf{x}$ such as: +\begin{align*} +\frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} += \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} = 0 +\end{align*} + +We can rewrite that as: +\begin{align*} + \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} + = \textbf{g} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x} = 0 +\end{align*} +with $\textbf{I}$ is the identity matrix. + +By doing so, the solution is: +\begin{align*} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x}= -\textbf{g} +\end{align*} +\begin{align*} + \textbf{x}= - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} +\end{align*} +with $\textbf{x}^T \textbf{x} = \Delta^2$. + +We have to solve this previous equation to find this $\textbf{x}$ in the +trust region, i.e. $||\textbf{x}|| = \Delta$. Now, this problem is +just a one dimension problem because we can express $\textbf{x}$ as a +function of $\lambda$: +\begin{align*} + \textbf{x}(\lambda) = - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} +\end{align*} + +We start from the fact that the hessian is diagonalizable. So we have: +\begin{align*} + \textbf{H} = \textbf{W} \cdot \textbf{h} \cdot \textbf{W}^T +\end{align*} +with: \newline +$\textbf{H}$, the hessian matrix \newline +$\textbf{W}$, the matrix containing the eigenvectors \newline +$\textbf{w}_i$, the i-th eigenvector, i.e. i-th column of $\textbf{W}$ \newline +$\textbf{h}$, the matrix containing the eigenvalues in ascending order \newline +$h_i$, the i-th eigenvalue in ascending order \newline + +Now we use the fact that adding a constant on the diagonal just shifts +the eigenvalues: +\begin{align*} + \textbf{H} + \textbf{I} \lambda = \textbf{W} \cdot (\textbf{h} + +\textbf{I} \lambda) \cdot \textbf{W}^T +\end{align*} + +By doing so we can express $\textbf{x}$ as a function of $\lambda$ +\begin{align*} + \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot + \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +\end{align*} +with $\lambda \neq - h_i$. + +An interesting thing in our case is the norm of $\textbf{x}$, +because we want $||\textbf{x}|| = \Delta$. Due to the orthogonality of +the eigenvectors $\left\{\textbf{w} \right\} _{i=1}^n$ we have: +\begin{align*} + ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot + \textbf{g})^2}{(h_i + \lambda)^2} +\end{align*} + +So the $||\textbf{x}(\lambda)||^2$ is just a function of $\lambda$. +And if we study the properties of this function we see that: +\begin{align*} + \lim_{\lambda\to\infty} ||\textbf{x}(\lambda)|| = 0 +\end{align*} +and if $\textbf{w}_i^T \cdot \textbf{g} \neq 0$: +\begin{align*} + \lim_{\lambda\to -h_i} ||\textbf{x}(\lambda)|| = + \infty +\end{align*} + +From these limits and knowing that $h_1$ is the lowest eigenvalue, we +can conclude that $||\textbf{x}(\lambda)||$ is a continuous and +strictly decreasing function on the interval $\lambda \in +(-h_1;\infty)$. Thus, there is one $\lambda$ in this interval which +gives $||\textbf{x}(\lambda)|| = \Delta$, consequently there is one +solution. + +Since $\textbf{x} = - (\textbf{H} + \lambda \textbf{I})^{-1} \cdot +\textbf{g}$ and we want to reduce the norm of $\textbf{x}$, clearly, +$\lambda > 0$ ($\lambda = 0$ is the unconstraint solution). But the +Newton method is only defined for a positive definite hessian matrix, +so $(\textbf{H} + \textbf{I} \lambda)$ must be positive +definite. Consequently, in the case where $\textbf{H}$ is not positive +definite, to ensure the positive definiteness, $\lambda$ must be +greater than $- h_1$. +\begin{align*} + \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1 +\end{align*} + +From that there are five cases: +- if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$ +- if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot + \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I} + \lambda)$ + must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$ +- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot + \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing + $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be + positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$) +- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot + \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing + $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be + positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is + similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda = + 0)|| \leq \Delta$ + but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ + time a constant to ensure the condition $||\textbf{x}(\lambda = + -h_1)|| = \Delta$ and escape from the saddle point + +Thus to find the solution, we can write: +\begin{align*} + ||\textbf{x}(\lambda)|| = \Delta +\end{align*} +\begin{align*} + ||\textbf{x}(\lambda)|| - \Delta = 0 +\end{align*} + +Taking the square of this equation +\begin{align*} + (||\textbf{x}(\lambda)|| - \Delta)^2 = 0 +\end{align*} +we have a function with one minimum for the optimal $\lambda$. +Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve +\begin{align*} + (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 +\end{align*} + +But in practice, it is more effective to solve: +\begin{align*} + (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 +\end{align*} + +To do that, we just use the Newton method with "trust_newton" using +first and second derivative of $(||\textbf{x}(\lambda)||^2 - +\Delta^2)^2$ with respect to $\textbf{x}$. +This will give the optimal $\lambda$ to compute the +solution $\textbf{x}$ with the formula seen previously: +\begin{align*} + \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot + \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +\end{align*} + +The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our +step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +#+END_SRC + +** Evolution of the trust region + +We initialize the trust region at the first iteration using a radius +\begin{align*} + \Delta = ||\textbf{x}(\lambda=0)|| +\end{align*} + +And for the next iteration the trust region will evolves depending of +the agreement of the energy prediction based on the Taylor series +truncated at the 2nd order and the real energy. If the Taylor series +truncated at the 2nd order represents correctly the energy landscape +the trust region will be extent else it will be reduced. In order to +mesure this agreement we use the ratio rho cf. "rho_model" and +"trust_e_model". From that we use the following values: +- if $\rho \geq 0.75$, then $\Delta = 2 \Delta$, +- if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, +- if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, +- if $\rho < 0.25$, then $\Delta = 0.25 \Delta$. + +In addition, if $\rho < 0.1$ the iteration is cancelled, so it +restarts with a smaller trust region until the energy decreases. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +#+END_SRC + +** Summary + +To summarize, knowing the hessian (eigenvectors and eigenvalues), the +gradient and the radius of the trust region we can compute the norm of +the Newton step +\begin{align*} + ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n + \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0 +\end{align*} + +- if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and + $\textbf{x}(\lambda=0)$ is in the trust region and it is not + necessary to put a constraint on $\textbf{x}$, the solution is the + unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$. +- else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and + $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in + the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda = + -h_1)$, similarly to the previous case. + But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ + time a constant to ensure the condition $||\textbf{x}(\lambda = + -h_1)|| = \Delta$ and escape from the saddle point +- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we + have to search $\lambda \in (-h_1, \infty)$ such as + $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method + \begin{align*} + (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 + \end{align*} + or + \begin{align*} + (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 + \end{align*} + which is numerically more stable. And finally compute + \begin{align*} + \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot + \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i + \end{align*} +- else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we + do exactly the same thing that the previous case but we search + $\lambda \in (0, \infty)$ +- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and + $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the + sum), again we do exactly the same thing that the previous case + searching $\lambda \in (-h_1, \infty)$. + + +For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not +necessary in fact to remove the $j = 1$ in the sum since the term +where $h_i - \lambda < 10^{-6}$ are not computed. + +After that, we take this vector $\textbf{x}^*$, called "x", and we do +the transformation to an antisymmetric matrix $\textbf{X}$, called +m_x. This matrix $\textbf{X}$ will be used to compute a rotation +matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix". + +NB: +An improvement can be done using a elleptical trust region. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +#+END_SRC + +** Code + +Provided: +| mo_num | integer | number of MOs | + +Cf. qp_edit in orbital optimization section, for some constants/thresholds + +Input: +| m | integer | number of MOs | +| n | integer | m*(m-1)/2 | +| n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +| H(n,n2) | double precision | hessian | +| v_grad(n) | double precision | gradient | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n, n) | double precision | eigenvectors of the hessian | +| rho | double precision | agreement between the model and the reality, | +| | | represents the quality of the energy prediction | +| nb_iter | integer | number of iteration | + +Input/Ouput: +| delta | double precision | radius of the trust region | + +Output: +| x(n) | double precision | vector containing the step | + +Internal: +| accu | double precision | temporary variable to compute the step | +| lambda | double precision | lagrange multiplier | +| trust_radius2 | double precision | square of the radius of the trust region | +| norm2_x | double precision | norm^2 of the vector x | +| norm2_g | double precision | norm^2 of the vector containing the gradient | +| tmp_wtg(n) | double precision | tmp_wtg(i) = w_i^T . g | +| i, j, k | integer | indexes | + +Function: +| dnrm2 | double precision | Blas function computing the norm | +| f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +subroutine trust_region_step(n,n2,nb_iter,v_grad,rho,e_val,w,x,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compuet the step in the trust region + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n,n2 + double precision, intent(in) :: v_grad(n), rho + integer, intent(inout) :: nb_iter + double precision, intent(in) :: e_val(n), w(n,n2) + + ! inout + double precision, intent(inout) :: delta + + ! out + double precision, intent(out) :: x(n) + + ! Internal + double precision :: accu, lambda, trust_radius2 + double precision :: norm2_x, norm2_g + double precision, allocatable :: tmp_wtg(:) + integer :: i,j,k + double precision :: t1,t2,t3 + integer :: n_neg_eval + + + ! Functions + double precision :: ddot, dnrm2 + double precision :: f_norm_trust_region_omp + + print*,'' + print*,'==================' + print*,'---Trust_region---' + print*,'==================' + + call wall_time(t1) + + ! Allocation + allocate(tmp_wtg(n)) +#+END_SRC + + +*** Initialization and norm + +The norm of the step size will be useful for the trust region +algorithm. We start from a first guess and the radius of the trust +region will evolve during the optimization. + +avoid_saddle is actually a test to avoid saddle points + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! Initialization of the Lagrange multiplier + lambda = 0d0 + + ! List of w^T.g, to avoid the recomputation + tmp_wtg = 0d0 + if (n == n2) then + do j = 1, n + do i = 1, n + tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) + enddo + enddo + else + ! For the diagonal case + do j = 1, n + k = int(w(j,1)+1d-15) + tmp_wtg(j) = v_grad(k) + enddo + endif + + ! Replacement of the small tmp_wtg corresponding to a negative eigenvalue + ! in the case of avoid_saddle + if (avoid_saddle .and. e_val(1) < - thresh_eig) then + i = 2 + ! Number of negative eigenvalues + do while (e_val(i) < - thresh_eig) + if (tmp_wtg(i) < thresh_wtg2) then + if (version_avoid_saddle == 1) then + tmp_wtg(i) = 1d0 + elseif (version_avoid_saddle == 2) then + tmp_wtg(i) = DABS(e_val(i)) + elseif (version_avoid_saddle == 3) then + tmp_wtg(i) = dsqrt(DABS(e_val(i))) + else + tmp_wtg(i) = thresh_wtg2 + endif + endif + i = i + 1 + enddo + + ! For the fist one it's a little bit different + if (tmp_wtg(1) < thresh_wtg2) then + tmp_wtg(1) = 0d0 + endif + + endif + + ! Norm^2 of x, ||x||^2 + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) + ! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta + ! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm + ! Anyway if the step is too big it will be reduced + !print*,'||x||^2 :', norm2_x + + ! Norm^2 of the gradient, ||v_grad||^2 + norm2_g = (dnrm2(n,v_grad,1))**2 + !print*,'||grad||^2 :', norm2_g +#+END_SRC + +*** Trust radius initialization + + At the first iteration (nb_iter = 0) we initialize the trust region + with the norm of the step generate by the Newton's method ($\textbf{x}_1 = + (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$, + we compute this norm using f_norm_trust_region_omp as explain just + below) + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! trust radius + if (nb_iter == 0) then + trust_radius2 = norm2_x + ! To avoid infinite loop of cancellation of this first step + ! without changing delta + nb_iter = 1 + + ! Compute delta, delta = sqrt(trust_radius) + delta = dsqrt(trust_radius2) + endif +#+END_SRC + +*** Modification of the trust radius + +In function of rho (which represents the agreement between the model +and the reality, cf. rho_model) the trust region evolves. We update +delta (the radius of the trust region). + +To avoid too big trust region we put a maximum size. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! Modification of the trust radius in function of rho + if (rho >= 0.75d0) then + delta = 2d0 * delta + elseif (rho >= 0.5d0) then + delta = delta + elseif (rho >= 0.25d0) then + delta = 0.5d0 * delta + else + delta = 0.25d0 * delta + endif + + ! Maximum size of the trust region + !if (delta > 0.5d0 * n * pi) then + ! delta = 0.5d0 * n * pi + ! print*,'Delta > delta_max, delta = 0.5d0 * n * pi' + !endif + + if (delta > 1d10) then + delta = 1d10 + endif + + !print*, 'Delta :', delta +#+END_SRC + +*** Calculation of the optimal lambda + +We search the solution of $(||x||^2 - \Delta^2)^2 = 0$ +- If $||\textbf{x}|| > \Delta$ or $h_1 < 0$ we have to add a constant + $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$ +- If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the + unconstrained one, $\lambda = 0$ + +You will find more details at the beginning + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! By giving delta, we search (||x||^2 - delta^2)^2 = 0 + ! and not (||x||^2 - delta)^2 = 0 + + ! Research of lambda to solve ||x(lambda)|| = Delta + + ! Display + !print*, 'e_val(1) = ', e_val(1) + !print*, 'w_1^T.g =', tmp_wtg(1) + + ! H positive definite + if (e_val(1) > - thresh_eig) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) + !print*, '||x(0)||=', dsqrt(norm2_x) + !print*, 'Delta=', delta + + ! H positive definite, ||x(lambda = 0)|| <= Delta + if (dsqrt(norm2_x) <= delta) then + !print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' + !print*, 'lambda = 0, no lambda optimization' + lambda = 0d0 + + ! H positive definite, ||x(lambda = 0)|| > Delta + else + ! Constraint solution + !print*, 'H positive definite, ||x(lambda = 0)|| > Delta' + !print*,'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + + ! H indefinite + else + if (DABS(tmp_wtg(1)) < thresh_wtg) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1)) + !print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) + endif + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta + if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then + ! Add e_val(1) in order to have (H - e_val(1) I) positive definite + !print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' + !print*, 'lambda = -e_val(1), no lambda optimization' + lambda = - e_val(1) + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta + ! and + ! H indefinite, w_1^T.g =/= 0 + else + ! Constraint solution/ add lambda + !if (DABS(tmp_wtg(1)) < thresh_wtg) then + ! print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' + !else + ! print*, 'H indefinite, w_1^T.g =/= 0' + !endif + !print*, 'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + + endif + + ! Recomputation of the norm^2 of the step x + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + print*,'' + print*,'Summary after the trust region:' + print*,'lambda:', lambda + print*,'||x||:', dsqrt(norm2_x) + print*,'delta:', delta +#+END_SRC + +*** Calculation of the step x + +x refers to $\textbf{x}^*$ +We compute x in function of lambda using its formula : +\begin{align*} +\textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i ++ \lambda} \cdot \textbf{w}_i +\end{align*} + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! Initialisation + x = 0d0 + + ! Calculation of the step x + + if (n == n2) then + ! Normal version + if (.not. absolute_eig) then + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) + enddo + endif + enddo + + ! Version to use the absolute value of the eigenvalues + else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) + enddo + endif + enddo + + endif + else + ! If the hessian is diagonal + ! Normal version + if (.not. absolute_eig) then + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + j = int(w(i,1) + 1d-15) + x(j) = - tmp_wtg(i) * 1d0 / (e_val(i) + lambda) + endif + enddo + + ! Version to use the absolute value of the eigenvalues + else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + j = int(w(i,1) + 1d-15) + x(j) = - tmp_wtg(i) * 1d0 / (DABS(e_val(i)) + lambda) + endif + enddo + + endif + endif + + double precision :: beta, norm_x + + ! Test + ! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1) + ! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first + ! eigenvectors multiply by a constant to ensure the condition + ! ||x(lambda=-e_val(1))|| = delta and escape the saddle point + if (avoid_saddle .and. e_val(1) < - thresh_eig) then + if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then + + ! norm of x + norm_x = dnrm2(n,x,1) + + ! Computes the coefficient for the w_1 + beta = delta**2 - norm_x**2 + + ! Updates the step x + x = x + W(:,1) * dsqrt(beta) + + ! Recomputes the norm to check + norm_x = dnrm2(n,x,1) + + print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):' + print*, '||x||', norm_x + endif + endif +#+END_SRC + +*** Transformation of x + +x is a vector of size n, so it can be write as a m by m +antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index". + + #+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +! ! Step transformation vector -> matrix +! ! Vector with n element -> mo_num by mo_num matrix +! do j = 1, m +! do i = 1, m +! if (i>j) then +! call mat_to_vec_index(i,j,k) +! m_x(i,j) = x(k) +! else +! m_x(i,j) = 0d0 +! endif +! enddo +! enddo +! +! ! Antisymmetrization of the previous matrix +! do j = 1, m +! do i = 1, m +! if (i lower diagonal matrix (p,q), p > q + +If a matrix is antisymmetric it can be reshaped as a vector. And the +vector can be reshaped as an antisymmetric matrix + +\begin{align*} +\begin{pmatrix} +0 & -1 & -2 & -4 \\ +1 & 0 & -3 & -5 \\ +2 & 3 & 0 & -6 \\ +4 & 5 & 6 & 0 +\end{pmatrix} +\Leftrightarrow +\begin{pmatrix} +1 & 2 & 3 & 4 & 5 & 6 +\end{pmatrix} +\end{align*} + +!!! Here the algorithm only work for the lower diagonal !!! + +Input: +| i | integer | index in the vector | + +Ouput: +| p,q | integer | corresponding indexes in the lower diagonal of a matrix | +| | | p > q, | +| | | p -> row, | +| | | q -> column | + +#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_index.irp.f +subroutine vec_to_mat_index(i,p,q) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing + ! its index i a vector + !END_DOC + + implicit none + + ! Variables + + ! in + integer,intent(in) :: i + + ! out + integer, intent(out) :: p,q + + ! internal + integer :: a,b + double precision :: da + + da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i))) + a = INT(da) + if ((a*(a-1))/2==i) then + p = a-1 + else + p = a + endif + b = p*(p-1)/2 + + ! Matrix element indexes + p = p + 1 + q = i - b + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/org/vec_to_mat_v2.org b/src/utils_trust_region/org/vec_to_mat_v2.org new file mode 100644 index 00000000..4ce5f5e1 --- /dev/null +++ b/src/utils_trust_region/org/vec_to_mat_v2.org @@ -0,0 +1,40 @@ +* Vect to antisymmetric matrix using mat_to_vec_index + +Vector to antisymmetric matrix transformation using mat_to_vec_index +subroutine. + +Can be done in OMP (for the first part and with omp critical for the second) + +#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_v2.irp.f +subroutine vec_to_mat_v2(n,m,v_x,m_x) + + !BEGIN_DOC + ! Vector to antisymmetric matrix + !END_DOC + + implicit none + + integer, intent(in) :: n,m + double precision, intent(in) :: v_x(n) + double precision, intent(out) :: m_x(m,m) + + integer :: i,j,k + + ! 1D -> 2D lower diagonal + m_x = 0d0 + do j = 1, m - 1 + do i = j + 1, m + call mat_to_vec_index(i,j,k) + m_x(i,j) = v_x(k) + enddo + enddo + + ! Antisym + do i = 1, m - 1 + do j = i + 1, m + m_x(i,j) = - m_x(j,i) + enddo + enddo + +end +#+END_SRC diff --git a/src/utils_trust_region/pi.h b/src/utils_trust_region/pi.h new file mode 100644 index 00000000..2c36a9f0 --- /dev/null +++ b/src/utils_trust_region/pi.h @@ -0,0 +1,2 @@ + logical, parameter :: debug=.False. + double precision, parameter :: pi = 3.1415926535897932d0 diff --git a/src/utils_trust_region/rotation_matrix.irp.f b/src/utils_trust_region/rotation_matrix.irp.f new file mode 100644 index 00000000..827af8c0 --- /dev/null +++ b/src/utils_trust_region/rotation_matrix.irp.f @@ -0,0 +1,441 @@ +! Rotation matrix + +! *Build a rotation matrix from an antisymmetric matrix* + +! Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as : +! $$ +! \textbf{R}=\exp(\textbf{A}) +! $$ + +! So : +! \begin{align*} +! \textbf{R}=& \exp(\textbf{A}) \\ +! =& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\ +! =& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A} +! \end{align*} + +! With : +! $\textbf{W}$ : eigenvectors of $\textbf{A}^2$ +! $\tau$ : $\sqrt{-x}$ +! $x$ : eigenvalues of $\textbf{A}^2$ + +! Input: +! | A(n,n) | double precision | antisymmetric matrix | +! | n | integer | number of columns of the A matrix | +! | LDA | integer | specifies the leading dimension of A, must be at least max(1,n) | +! | LDR | integer | specifies the leading dimension of R, must be at least max(1,n) | + +! Output: +! | R(n,n) | double precision | Rotation matrix | +! | info | integer | if info = 0, the execution is successful | +! | | | if info = k, the k-th parameter has an illegal value | +! | | | if info = -k, the algorithm failed | + +! Internal: +! | B(n,n) | double precision | B = A.A | +! | work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork) | +! | lwork | integer | dimension of the syev work array >= max(1, 3n-1) | +! | W(n,n) | double precision | eigenvectors of B | +! | e_val(n) | double precision | eigenvalues of B | +! | m_diag(n,n) | double precision | diagonal matrix with the eigenvalues of B | +! | cos_tau(n,n) | double precision | diagonal matrix with cos(tau) values | +! | sin_tau(n,n) | double precision | diagonal matrix with sin cos(tau) values | +! | tau_m1(n,n) | double precision | diagonal matrix with (tau)^-1 values | +! | part_1(n,n) | double precision | matrix W.cos_tau.W^t | +! | part_1a(n,n) | double precision | matrix cos_tau.W^t | +! | part_2(n,n) | double precision | matrix W.tau_m1.sin_tau.W^t.A | +! | part_2a(n,n) | double precision | matrix W^t.A | +! | part_2b(n,n) | double precision | matrix sin_tau.W^t.A | +! | part_2c(n,n) | double precision | matrix tau_m1.sin_tau.W^t.A | +! | RR_t(n,n) | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 | +! | norm | integer | norm of R.R^t-1, must be equal to 0 | +! | i,j | integer | indexes | + +! Functions: +! | dnrm2 | double precision | Lapack function, compute the norm of a matrix | +! | disnan | logical | Lapack function, check if an element is NaN | + + + +subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) + + implicit none + + !BEGIN_DOC + ! Rotation matrix to rotate the molecular orbitals. + ! If the rotation is too large the transformation is not unitary and must be cancelled. + !END_DOC + + include 'pi.h' + + ! Variables + + ! in + integer, intent(in) :: n,LDA,LDR + double precision, intent(inout) :: A(LDA,n) + + ! out + double precision, intent(out) :: R(LDR,n) + integer, intent(out) :: info + logical, intent(out) :: enforce_step_cancellation + + ! internal + double precision, allocatable :: B(:,:) + double precision, allocatable :: work(:,:) + double precision, allocatable :: W(:,:), e_val(:) + double precision, allocatable :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:) + double precision, allocatable :: part_1(:,:),part_1a(:,:) + double precision, allocatable :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:) + double precision, allocatable :: RR_t(:,:) + integer :: i,j + integer :: info2, lwork ! for dsyev + double precision :: norm, max_elem, max_elem_A, t1,t2,t3 + + ! function + double precision :: dnrm2 + logical :: disnan + + print*,'' + print*,'---rotation_matrix---' + + call wall_time(t1) + + ! Allocation + allocate(B(n,n)) + allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n)) + allocate(W(n,n),e_val(n)) + allocate(part_1(n,n),part_1a(n,n)) + allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n)) + allocate(RR_t(n,n)) + +! Pre-conditions + +! Initialization +info=0 +enforce_step_cancellation = .False. + +! Size of matrix A must be at least 1 by 1 +if (n<1) then + info = 3 + print*, 'WARNING: invalid parameter 5' + print*, 'n<1' + return +endif + +! Leading dimension of A must be >= n +if (LDA < n) then + info = 25 + print*, 'WARNING: invalid parameter 2 or 5' + print*, 'LDA < n' + return +endif + +! Leading dimension of A must be >= n +if (LDR < n) then + info = 4 + print*, 'WARNING: invalid parameter 4' + print*, 'LDR < n' + return +endif + +! Matrix elements of A must by non-NaN +do j = 1, n + do i = 1, n + if (disnan(A(i,j))) then + info=1 + print*, 'WARNING: invalid parameter 1' + print*, 'NaN element in A matrix' + return + endif + enddo +enddo + +do i = 1, n + if (A(i,i) /= 0d0) then + print*, 'WARNING: matrix A is not antisymmetric' + print*, 'Non 0 element on the diagonal', i, A(i,i) + call ABORT + endif +enddo + +do j = 1, n + do i = 1, n + if (A(i,j)+A(j,i)>1d-16) then + print*, 'WANRING: matrix A is not antisymmetric' + print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i) + print*, 'diff:', A(i,j)+A(j,i) + call ABORT + endif + enddo +enddo + +! Fix for too big elements ! bad idea better to cancel if the error is too big +!do j = 1, n +! do i = 1, n +! A(i,j) = mod(A(i,j),2d0*pi) +! if (dabs(A(i,j)) > pi) then +! A(i,j) = 0d0 +! endif +! enddo +!enddo + +max_elem_A = 0d0 +do j = 1, n + do i = 1, n + if (ABS(A(i,j)) > ABS(max_elem_A)) then + max_elem_A = A(i,j) + endif + enddo +enddo +!print*,'max element in A', max_elem_A + +if (ABS(max_elem_A) > 2 * pi) then + print*,'' + print*,'WARNING: ABS(max_elem_A) > 2 pi ' + print*,'' +endif + +! B=A.A +! - Calculation of the matrix $\textbf{B} = \textbf{A}^2$ +! - Diagonalization of $\textbf{B}$ +! W, the eigenvectors +! e_val, the eigenvalues + + +! Compute B=A.A + +call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1)) + +! Copy B in W, diagonalization will put the eigenvectors in W +W=B + +! Diagonalization of B +! Eigenvalues -> e_val +! Eigenvectors -> W +lwork = 3*n-1 +allocate(work(lwork,n)) + +!print*,'Starting diagonalization ...' + +call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2) + +deallocate(work) + +if (info2 < 0) then + print*, 'WARNING: error in the diagonalization' + print*, 'Illegal value of the ', info2,'-th parameter' +elseif (info2 >0) then + print*, "WARNING: Diagonalization failed to converge" +endif + +! Tau^-1, cos(tau), sin(tau) +! $$\tau = \sqrt{-x}$$ +! - Calculation of $\cos(\tau)$ $\Leftrightarrow$ $\cos(\sqrt{-x})$ +! - Calculation of $\sin(\tau)$ $\Leftrightarrow$ $\sin(\sqrt{-x})$ +! - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$ +! These matrices are diagonals + +! Diagonal matrix m_diag +do j = 1, n + if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems + e_val(j) = 0.d0 + else + e_val(j) = - e_val(j) + endif +enddo + +m_diag = 0.d0 +do i = 1, n + m_diag(i,i) = e_val(i) +enddo + +! cos_tau +do j = 1, n + do i = 1, n + if (i==j) then + cos_tau(i,j) = dcos(dsqrt(e_val(i))) + else + cos_tau(i,j) = 0d0 + endif + enddo +enddo + +! sin_tau +do j = 1, n + do i = 1, n + if (i==j) then + sin_tau(i,j) = dsin(dsqrt(e_val(i))) + else + sin_tau(i,j) = 0d0 + endif + enddo +enddo + +! Debug, display the cos_tau and sin_tau matrix +!if (debug) then +! print*, 'cos_tau' +! do i = 1, n +! print*, cos_tau(i,:) +! enddo +! print*, 'sin_tau' +! do i = 1, n +! print*, sin_tau(i,:) +! enddo +!endif + +! tau^-1 +do j = 1, n + do i = 1, n + if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small + tau_m1(i,j) = 1d0/(dsqrt(e_val(i))) + else + tau_m1(i,j) = 0d0 + endif + enddo +enddo + +max_elem = 0d0 +do i = 1, n + if (ABS(tau_m1(i,i)) > ABS(max_elem)) then + max_elem = tau_m1(i,i) + endif +enddo +!print*,'max elem tau^-1:', max_elem + +! Debug +!print*,'eigenvalues:' +!do i = 1, n +! print*, e_val(i) +!enddo + +!Debug, display tau^-1 +!if (debug) then +! print*, 'tau^-1' +! do i = 1, n +! print*,tau_m1(i,:) +! enddo +!endif + +! Rotation matrix +! \begin{align*} +! \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} +! \end{align*} +! \begin{align*} +! \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} +! \end{align*} +! \begin{align*} +! \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} +! \end{align*} + +! First: +! part_1 = dgemm(W, dgemm(cos_tau, W^t)) +! part_1a = dgemm(cos_tau, W^t) +! part_1 = dgemm(W, part_1a) +! And: +! part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A)))) +! part_2a = dgemm(W^t, A) +! part_2b = dgemm(sin_tau, part_2a) +! part_2c = dgemm(tau_m1, part_2b) +! part_2 = dgemm(W, part_2c) +! Finally: +! Rotation matrix, R = part_1+part_2 + +! If $R$ is a rotation matrix: +! $R.R^T=R^T.R=\textbf{1}$ + +! part_1 +call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1)) +call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1)) + +! part_2 +call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1)) +call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1)) +call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1)) +call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1)) + +! Rotation matrix R +R = part_1 + part_2 + +! Matrix check +! R.R^t and R^t.R must be equal to identity matrix +do j = 1, n + do i=1,n + if (i==j) then + RR_t(i,j) = 1d0 + else + RR_t(i,j) = 0d0 + endif + enddo +enddo + +call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1)) + +norm = dnrm2(n*n,RR_t,1) +!print*, 'Rotation matrix check, norm R.R^T = ', norm + +! Debug +!if (debug) then +! print*, 'RR_t' +! do i = 1, n +! print*, RR_t(i,:) +! enddo +!endif + +! Post conditions + +! Check if R.R^T=1 +max_elem = 0d0 +do j = 1, n + do i = 1, n + if (ABS(RR_t(i,j)) > ABS(max_elem)) then + max_elem = RR_t(i,j) + endif + enddo +enddo + +print*, 'Max error in R.R^T:', max_elem +!print*, 'e_val(1):', e_val(1) +!print*, 'e_val(n):', e_val(n) +!print*, 'max elem in A:', max_elem_A + +if (ABS(max_elem) > 1d-12) then + print*, 'WARNING: max error in R.R^T > 1d-12' + print*, 'Enforce the step cancellation' + enforce_step_cancellation = .True. +endif + +! Matrix elements of R must by non-NaN +do j = 1,n + do i = 1,LDR + if (disnan(R(i,j))) then + info = 666 + print*, 'NaN in rotation matrix' + call ABORT + endif + enddo +enddo + +! Display +!if (debug) then +! print*,'Rotation matrix :' +! do i = 1, n +! write(*,'(100(F10.5))') R(i,:) +! enddo +!endif + +! Deallocation, end + +deallocate(B) + deallocate(m_diag,cos_tau,sin_tau,tau_m1) + deallocate(W,e_val) + deallocate(part_1,part_1a) + deallocate(part_2,part_2a,part_2b,part_2c) + deallocate(RR_t) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in rotation matrix:', t3 + + print*,'---End rotation_matrix---' + +end subroutine diff --git a/src/utils_trust_region/rotation_matrix_iterative.irp.f b/src/utils_trust_region/rotation_matrix_iterative.irp.f new file mode 100644 index 00000000..f268df04 --- /dev/null +++ b/src/utils_trust_region/rotation_matrix_iterative.irp.f @@ -0,0 +1,134 @@ +! Rotation matrix with the iterative method + +! \begin{align*} +! \textbf{R} = \sum_{k=0}^{\infty} \frac{1}{k!} \textbf{X}^k +! \end{align*} + +! !!! Doesn't work !!! + + +subroutine rotation_matrix_iterative(m,X,R) + + implicit none + + ! in + integer, intent(in) :: m + double precision, intent(in) :: X(m,m) + + ! out + double precision, intent(out) :: R(m,m) + + ! internal + double precision :: max_elem, pre_factor + double precision :: t1,t2,t3 + integer :: k,l,i,j + logical :: not_converged + double precision, allocatable :: RRT(:,:), A(:,:), B(:,:) + + ! Functions + integer :: factorial + + print*,'---rotation_matrix_iterative---' + call wall_time(t1) + + allocate(RRT(m,m),A(m,m),B(m,m)) + + ! k = 0 + R = 0d0 + do i = 1, m + R(i,i) = 1d0 + enddo + + ! k = 1 + R = R + X + + k = 2 + + not_converged = .True. + + do while (not_converged) + + pre_factor = 1d0/DBLE(factorial(k)) + if (pre_factor < 1d-15) then + print*,'pre factor=', pre_factor,'< 1d-15, exit' + exit + endif + + A = X + B = 0d0 + do l = 1, k-1 + call dgemm('N','N',m,m,m,1d0,X,size(X,1),A,size(A,1),0d0,B,size(B,1)) + A = B + enddo + + !print*,'B' + !do i = 1, m + ! print*,B(i,:) * 1d0/DBLE(factorial(k)) + !enddo + + R = R + pre_factor * B + + k = k + 1 + call dgemm('T','N',m,m,m,1d0,R,size(R,1),R,size(R,1),0d0,RRT,size(RRT,1)) + + !print*,'R' + !do i = 1, m + ! write(*,'(10(E12.5))') R(i,:) + !enddo + + do i = 1, m + RRT(i,i) = RRT(i,i) - 1d0 + enddo + + !print*,'RRT' + !do i = 1, m + ! write(*,'(10(E12.5))') RRT(i,:) + !enddo + + max_elem = 0d0 + do j = 1, m + do i = 1, m + if (dabs(RRT(i,j)) > max_elem) then + max_elem = dabs(RRT(i,j)) + endif + enddo + enddo + + print*, 'Iteration:', k + print*, 'Max error in R:', max_elem + + if (max_elem < 1d-12) then + not_converged = .False. + endif + + enddo + + deallocate(RRT,A,B) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in rotation matrix iterative:', t3 + print*,'---End roration_matrix_iterative---' + + +print*,'Does not work yet, abort' +call abort + +end + +! Factorial + +function factorial(n) + + implicit none + + integer, intent(in) :: n + integer :: factorial, k + + factorial = 1 + + do k = 1, n + factorial = factorial * k + enddo + +end diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f new file mode 100644 index 00000000..75d04352 --- /dev/null +++ b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f @@ -0,0 +1,64 @@ +! Rotation matrix in a subspace to rotation matrix in the full space + +! Usually, we are using a list of MOs, for exemple the active ones. When +! we compute a rotation matrix to rotate the MOs, we just compute a +! rotation matrix for these MOs in order to reduce the size of the +! matrix which has to be computed. Since the computation of a rotation +! matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to +! reuce the number of MOs involved. +! After that we replace the rotation matrix in the full space by +! building the elements of the rotation matrix in the full space from +! the elements of the rotation matrix in the subspace and adding some 0 +! on the extradiagonal elements and some 1 on the diagonal elements, +! for the MOs that are not involved in the rotation. + +! Provided: +! | mo_num | integer | Number of MOs | + +! Input: +! | m | integer | Size of tmp_list, m <= mo_num | +! | tmp_list(m) | integer | List of MOs | +! | tmp_R(m,m) | double precision | Rotation matrix in the space of | +! | | | the MOs containing by tmp_list | + +! Output: +! | R(mo_num,mo_num | double precision | Rotation matrix in the space | +! | | | of all the MOs | + +! Internal: +! | i,j | integer | indexes in the full space | +! | tmp_i,tmp_j | integer | indexes in the subspace | + + +subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) + + !BEGIN_DOC + ! Compute the full rotation matrix from a smaller one + !END_DOC + + implicit none + + ! in + integer, intent(in) :: m, tmp_list(m) + double precision, intent(in) :: tmp_R(m,m) + + ! out + double precision, intent(out) :: R(mo_num,mo_num) + + ! internal + integer :: i,j,tmp_i,tmp_j + + ! tmp_R to R, subspace to full space + R = 0d0 + do i = 1, mo_num + R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital + enddo + do tmp_j = 1, m + j = tmp_list(tmp_j) + do tmp_i = 1, m + i = tmp_list(tmp_i) + R(i,j) = tmp_R(tmp_i,tmp_j) + enddo + enddo + +end diff --git a/src/utils_trust_region/trust_region_expected_e.irp.f b/src/utils_trust_region/trust_region_expected_e.irp.f new file mode 100644 index 00000000..ad5ad2f9 --- /dev/null +++ b/src/utils_trust_region/trust_region_expected_e.irp.f @@ -0,0 +1,126 @@ +! Predicted energy : e_model + +! *Compute the energy predicted by the Taylor series* + +! The energy is predicted using a Taylor expansion truncated at te 2nd +! order : + +! \begin{align*} +! E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2) +! \end{align*} + +! Input: +! | n | integer | m*(m-1)/2 | +! | n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +! | v_grad(n) | double precision | gradient | +! | H(n,n) | double precision | hessian | +! | x(n) | double precision | Step in the trust region | +! | prev_energy | double precision | previous energy | + +! Output: +! | e_model | double precision | predicted energy after the rotation of the MOs | + +! Internal: +! | part_1 | double precision | v_grad^T.x | +! | part_2 | double precision | 1/2 . x^T.H.x | +! | part_2a | double precision | H.x | +! | i,j | integer | indexes | + +! Function: +! | ddot | double precision | dot product (Lapack) | + + +subroutine trust_region_expected_e(n,n2,v_grad,H,x,prev_energy,e_model) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the expected criterion/energy after the application of the step x + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n,n2 + double precision, intent(in) :: v_grad(n),H(n,n2),x(n) + double precision, intent(in) :: prev_energy + + ! out + double precision, intent(out) :: e_model + + ! internal + double precision :: part_1, part_2, t1,t2,t3 + double precision, allocatable :: part_2a(:) + + integer :: i,j + + !Function + double precision :: ddot + + print*,'' + print*,'---Trust_e_model---' + + call wall_time(t1) + + ! Allocation + allocate(part_2a(n)) + +! Calculations + +! part_1 corresponds to the product g.x +! part_2a corresponds to the product H.x +! part_2 corresponds to the product 0.5*(x^T.H.x) + +! TODO: remove the dot products + + +! Product v_grad.x + part_1 = ddot(n,v_grad,1,x,1) + + !if (debug) then + ! print*,'g.x : ', part_1 + !endif + + ! Product H.x + if (n == n2) then + call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) + else + ! If the hessian is diagonal + do i = 1, n + part_2a(i) = H(i,1) * x(i) + enddo + endif + + ! Product 1/2 . x^T.H.x + part_2 = 0.5d0 * ddot(n,x,1,part_2a,1) + + !if (debug) then + ! print*,'1/2*x^T.H.x : ', part_2 + !endif + + + ! Sum + e_model = prev_energy + part_1 + part_2 + + ! Writing the predicted energy + print*, 'prev_energy: ', prev_energy + print*, 'Predicted energy after the rotation:', e_model + print*, 'Previous energy - predicted energy: ', prev_energy - e_model + + ! Can be deleted, already in another subroutine + if (DABS(prev_energy - e_model) < 1d-12 ) then + print*,'WARNING: ABS(prev_energy - e_model) < 1d-12' + endif + + ! Deallocation + deallocate(part_2a) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust e model:', t3 + + print*,'---End trust_e_model---' + +end subroutine diff --git a/src/utils_trust_region/trust_region_optimal_lambda.irp.f b/src/utils_trust_region/trust_region_optimal_lambda.irp.f new file mode 100644 index 00000000..b7dcf875 --- /dev/null +++ b/src/utils_trust_region/trust_region_optimal_lambda.irp.f @@ -0,0 +1,1651 @@ +! Newton's method to find the optimal lambda + +! *Compute the lambda value for the trust region* + +! This subroutine uses the Newton method in order to find the optimal +! lambda. This constant is added on the diagonal of the hessian to shift +! the eiganvalues. It has a double role: +! - ensure that the resulting hessian is positive definite for the +! Newton method +! - constrain the step in the trust region, i.e., +! $||\textbf{x}(\lambda)|| \leq \Delta$, where $\Delta$ is the radius +! of the trust region. +! We search $\lambda$ which minimizes +! \begin{align*} +! f(\lambda) = (||\textbf{x}_{(k+1)}(\lambda)||^2 -\Delta^2)^2 +! \end{align*} +! or +! \begin{align*} +! \tilde{f}(\lambda) = (\frac{1}{||\textbf{x}_{(k+1)}(\lambda)||^2}-\frac{1}{\Delta^2})^2 +! \end{align*} +! and gives obviously 0 in both cases. \newline + +! There are several cases: +! - If $\textbf{H}$ is positive definite the interval containing the +! solution is $\lambda \in (0, \infty)$ (and $-h_1 < 0$). +! - If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot +! \textbf{g} \neq 0$ then the interval containing +! the solution is $\lambda \in (-h_1, \infty)$. +! - If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot +! \textbf{g} = 0$ then the interval containing the solution is +! $\lambda \in (-h_1, \infty)$. The terms where $|h_i - \lambda| < +! 10^{-12}$ are not computed, so the term where $i = 1$ is +! automatically removed and this case becomes similar to the previous one. + +! So to avoid numerical problems (cf. trust_region) we start the +! algorithm at $\lambda=\max(0 + \epsilon,-h_1 + \epsilon)$, +! with $\epsilon$ a little constant. +! The research must be restricted to the interval containing the +! solution. For that reason a little trust region in 1D is used. + +! The Newton method to find the optimal $\lambda$ is : +! \begin{align*} +! \lambda_{(l+1)} &= \lambda_{(l)} - f^{''}(\lambda)_{(l)}^{-1} f^{'}(\lambda)_{(l)}^{} \\ +! \end{align*} +! $f^{'}(\lambda)_{(l)}$: the first derivative of $f$ with respect to +! $\lambda$ at the l-th iteration, +! $f^{''}(\lambda)_{(l)}$: the second derivative of $f$ with respect to +! $\lambda$ at the l-th iteration.\newline + +! Noting the Newton step $y = - f^{''}(\lambda)_{(l)}^{-1} +! f^{'}(\lambda)_{(l)}^{}$ we constrain $y$ such as +! \begin{align*} +! y \leq \alpha +! \end{align*} +! with $\alpha$ a scalar representing the trust length (trust region in +! 1D) where the function $f$ or $\tilde{f}$ is correctly describe by the +! Taylor series truncated at the second order. Thus, if $y > \alpha$, +! the constraint is applied as +! \begin{align*} +! y^* = \alpha \frac{y}{|y|} +! \end{align*} +! with $y^*$ the solution in the trust region. + +! The size of the trust region evolves in function of $\rho$ as for the +! trust region seen previously cf. trust_region, rho_model. +! The prediction of the value of $f$ or $\tilde{f}$ is done using the +! Taylor series truncated at the second order cf. "trust_region", +! "trust_e_model". + +! The first and second derivatives of $f(\lambda) = (||\textbf{x}(\lambda)||^2 - +! \Delta^2)^2$ with respect to $\lambda$ are: +! \begin{align*} +! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left(\sum_{i=1}^n \frac{-2(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) +! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +! \end{align*} +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +! \end{align*} + +! The first and second derivatives of $\tilde{f}(\lambda) = (1/||\textbf{x}(\lambda)||^2 - +! 1/\Delta^2)^2$ with respect to $\lambda$ are: +! \begin{align*} +! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{4}{\Delta^2} \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ +! &= 4 \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \left( \frac{1}{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{1}{\Delta^2 (\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +! \end{align*} + +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} +! - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ +! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2} +! {(h_i + \lambda)^3)})^2}{(\sum_ {i=1}^n\frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +! \end{align*} + +! Provided in qp_edit: +! | thresh_rho_2 | +! | thresh_cc | +! | nb_it_max_lambda | +! | version_lambda_search | +! | nb_it_max_pre_search | +! see qp_edit for more details + +! Input: +! | n | integer | m*(m-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | tmp_wtg(n) | double precision | w_i^T.v_grad(i) | +! | delta | double precision | delta for the trust region | + +! Output: +! | lambda | double precision | Lagrange multiplier to constrain the norm of the size of the Newton step | +! | | | lambda > 0 | + +! Internal: +! | d1_N | double precision | value of d1_norm_trust_region | +! | d2_N | double precision | value of d2_norm_trust_region | +! | f_N | double precision | value of f_norm_trust_region | +! | prev_f_N | double precision | previous value of f_norm_trust_region | +! | f_R | double precision | (norm(x)^2 - delta^2)^2 or (1/norm(x)^2 - 1/delta^2)^2 | +! | prev_f_R | double precision | previous value of f_R | +! | model | double precision | predicted value of f_R from prev_f_R and y | +! | d_1 | double precision | value of the first derivative | +! | d_2 | double precision | value of the second derivative | +! | y | double precision | Newton's step, y = -f''^-1 . f' = lambda - prev_lambda | +! | prev_lambda | double precision | previous value of lambda | +! | t1,t2,t3 | double precision | wall time | +! | i | integer | index | +! | epsilon | double precision | little constant to avoid numerical problem | +! | rho_2 | double precision | (prev_f_R - f_R)/(prev_f_R - model), agreement between model and f_R | +! | version | integer | version of the root finding method | + +! Function: +! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +! | d2_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +! | d1_norm_inverse_trust_region | double precision | first derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +! | d2_norm_inverse_trust_region | double precision | second derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +! | f_norm_trust_region | double precision | value of norm(x)^2 | + + + +subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + + include 'pi.h' + + !BEGIN_DOC + ! Research the optimal lambda to constrain the step size in the trust region + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(inout) :: e_val(n) + double precision, intent(in) :: delta + double precision, intent(in) :: tmp_wtg(n) + + ! out + double precision, intent(out) :: lambda + + ! Internal + double precision :: d1_N, d2_N, f_N, prev_f_N + double precision :: prev_f_R, f_R + double precision :: model + double precision :: d_1, d_2 + double precision :: t1,t2,t3 + integer :: i + double precision :: epsilon + double precision :: y + double precision :: prev_lambda + double precision :: rho_2 + double precision :: alpha + integer :: version + + ! Functions + double precision :: d1_norm_trust_region,d1_norm_trust_region_omp + double precision :: d2_norm_trust_region, d2_norm_trust_region_omp + double precision :: f_norm_trust_region, f_norm_trust_region_omp + double precision :: d1_norm_inverse_trust_region + double precision :: d2_norm_inverse_trust_region + double precision :: d1_norm_inverse_trust_region_omp + double precision :: d2_norm_inverse_trust_region_omp + + print*,'' + print*,'---Trust_newton---' + + call wall_time(t1) + + ! version_lambda_search + ! 1 -> ||x||^2 - delta^2 = 0, + ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better) + !if (version_lambda_search == 1) then + ! print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' + !else + ! print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' + !endif + ! Version 2 is normally better + + + +! Resolution with the Newton method: + + +! Initialization + epsilon = 1d-4 + lambda = max(0d0, -e_val(1)) + + ! Pre research of lambda to start near the optimal lambda + ! by adding a constant epsilon and changing the constant to + ! have ||x(lambda + epsilon)|| ~ delta, before setting + ! lambda = lambda + epsilon + !print*, 'Pre research of lambda:' + !print*,'Initial lambda =', lambda + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + !print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta + i = 1 + + ! To increase lambda + if (f_N > delta**2) then + !print*,'Increasing lambda...' + do while (f_N > delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 2d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + !print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N < f_N) then + print*,'WARNING, error: prev_f_N < f_N, exit' + epsilon = epsilon * 0.5d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + + ! To reduce lambda + else + !print*,'Reducing lambda...' + do while (f_N < delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 0.5d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + !print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N > f_N) then + print*,'WARNING, error: prev_f_N > f_N, exit' + epsilon = epsilon * 2d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + endif + + !print*,'End of the pre research of lambda' + + ! New value of lambda + lambda = lambda + epsilon + + !print*, 'e_val(1):', e_val(1) + !print*, 'Staring point, lambda =', lambda + + ! thresh_cc, threshold for the research of the optimal lambda + ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc + ! thresh_rho_2, threshold to cancel the step in the research + ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2 + + !print*,'Threshold for the CC:', thresh_cc + !print*,'Threshold for rho_2:', thresh_rho_2 + !print*, 'w_1^T . g =', tmp_wtg(1) + + ! Debug + !print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' + + ! Initialization + i = 1 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) ! Value of the ||x(lambda)||^2 + model = 0d0 ! predicted value of (||x||^2 - delta^2)^2 + prev_f_N = 0d0 ! previous value of ||x||^2 + prev_f_R = 0d0 ! previous value of (||x||^2 - delta^2)^2 + f_R = 0d0 ! value of (||x||^2 - delta^2)^2 + rho_2 = 0d0 ! (prev_f_R - f_R)/(prev_f_R - m) + y = 0d0 ! step size + prev_lambda = 0d0 ! previous lambda + + ! Derivatives + if (version_lambda_search == 1) then + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + + ! Trust length + alpha = DABS((1d0/d_2)*d_1) + + ! Newton's method + do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc) + !print*,'--------------------------------------' + !print*,'Research of lambda, iteration:', i + !print*,'--------------------------------------' + + ! Update of f_N, f_R and the derivatives + prev_f_N = f_N + if (version_lambda_search == 1) then + prev_f_R = (prev_f_N - delta**2)**2 + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + prev_f_R = (1d0/prev_f_N - 1d0/delta**2)**2 + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + !write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 + + ! Newton's step + y = -(1d0/DABS(d_2))*d_1 + + ! Constraint on y (the newton step) + if (DABS(y) > alpha) then + y = alpha * (y/DABS(y)) ! preservation of the sign of y + endif + !write(*,'(a,E12.5)') ' Step length: ', y + + ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series + model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 + + ! Updates lambda + prev_lambda = lambda + lambda = prev_lambda + y + !print*,'prev lambda:', prev_lambda + !print*,'new lambda:', lambda + + ! Checks if lambda is in (-h_1, \infty) + if (lambda > MAX(0d0, -e_val(1))) then + ! New value of ||x(lambda)||^2 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + ! New f_R + if (version_lambda_search == 1) then + f_R = (f_N - delta**2)**2 ! new value of (||x(lambda)||^2 - delta^2)^2 + else + f_R = (1d0/f_N - 1d0/delta**2)**2 ! new value of (1/||x(lambda)||^2 -1/delta^2)^2 + endif + + !if (version_lambda_search == 1) then + ! print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R + ! print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R + ! print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model + !else + ! print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R + ! print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R + ! print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model + !endif + + !print*,'previous - actual:', prev_f_R - f_R + !print*,'previous - model:', prev_f_R - model + + ! Check the gain + if (DABS(prev_f_R - model) < thresh_model_2) then + print*,'' + print*,'WARNING: ABS(previous - model) <', thresh_model_2, 'rho_2 will tend toward infinity' + print*,'' + endif + + ! Will be deleted + !if (prev_f_R - f_R <= 1d-16 .or. prev_f_R - model <= 1d-16) then + ! print*,'' + ! print*,'WARNING: ABS(previous - model) <= 1d-16, exit' + ! print*,'' + ! exit + !endif + + ! Computes rho_2 + rho_2 = (prev_f_R - f_R)/(prev_f_R - model) + !print*,'rho_2:', rho_2 + else + rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty) + !print*,'lambda < -e_val(1) ===> rho_2 = 0' + endif + + ! Evolution of the trust length, alpha + if (rho_2 >= 0.75d0) then + alpha = 2d0 * alpha + elseif (rho_2 >= 0.5d0) then + alpha = alpha + elseif (rho_2 >= 0.25d0) then + alpha = 0.5d0 * alpha + else + alpha = 0.25d0 * alpha + endif + !write(*,'(a,E12.5)') ' New trust length alpha: ', alpha + + ! cancellaion of the step if rho < 0.1 + if (rho_2 < thresh_rho_2) then !0.1d0) then + lambda = prev_lambda + f_N = prev_f_N + !print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' + endif + + !print*,'' + !print*,'lambda, ||x||, delta:' + !print*, lambda, dsqrt(f_N), delta + !print*,'CC:', DABS(1d0 - f_N/delta**2) + !print*,'' + + i = i + 1 + enddo + + ! if trust newton failed + if (i > nb_it_max_lambda) then + print*,'' + print*,'######################################################' + print*,'WARNING: i >', nb_it_max_lambda,'for the trust Newton' + print*,'The research of the optimal lambda has failed' + print*,'######################################################' + print*,'' + endif + + print*,'Number of iterations:', i + print*,'Value of lambda:', lambda + !print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 + print*,'Convergence criterion:', 1d0-f_N/delta**2 + !print*,'Error on the trust region (||x||^2 - delta^2)^2):', (f_N - delta**2)**2 + !print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 + + ! Time + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust_newton:', t3 + + print*,'---End trust_newton---' + +end subroutine + +! OMP: First derivative of (||x||^2 - Delta^2)^2 + +! *Function to compute the first derivative of (||x||^2 - Delta^2)^2* + +! This function computes the first derivative of (||x||^2 - Delta^2)^2 +! with respect to lambda. + +! \begin{align*} +! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = -4 \left(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} \right) +! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i+ \lambda)^2} \right) +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} + +! Provided: +! | mo_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + + +function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg,accu1,accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1,accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_trust_region_omp = -4d0 * accu2 * (accu1 - delta**2) + + deallocate(tmp_accu1, tmp_accu2) + +end function + +! OMP: Second derivative of (||x||^2 - Delta^2)^2 + +! *Function to compute the second derivative of (||x||^2 - Delta^2)^2* + +! This function computes the second derivative of (||x||^2 - Delta^2)^2 +! with respect to lambda. +! \begin{align*} +! \frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | accu3 | double precision | third sum of the formula | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_accu2 | double precision | temporary array for the third sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d2_norm_trust_region | double precision | second derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + + +function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region_omp + double precision :: ddot + + ! Internal + double precision :: accu1,accu2,accu3 + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + integer :: i, j + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + + ! accu1 + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + ! accu2 + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + ! accu3 + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + d2_norm_trust_region_omp = 2d0 * (6d0 * accu3 * (- delta**2 + accu1) + (-2d0 * accu2)**2) + + deallocate(tmp_accu1, tmp_accu2, tmp_accu3) + +end function + +! OMP: Function value of ||x||^2 + +! *Compute the value of ||x||^2* + +! This function computes the value of ||x(lambda)||^2 + +! \begin{align*} +! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | + +! Internal: +! | tmp_wtg(n) | double precision | temporary array for W^T.v_grad | +! | tmp_fN | double precision | temporary array for the function | +! | i,j | integer | indexes | + + +function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + use omp_lib + + include 'pi.h' + + !BEGIN_DOC + ! Compute ||x(lambda)||^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! functions + double precision :: f_norm_trust_region_omp + + ! internal + double precision, allocatable :: tmp_fN(:) + integer :: i,j + + ! Allocation + allocate(tmp_fN(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_fN, tmp_wtg, f_norm_trust_region_omp) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + f_norm_trust_region_omp = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_fN(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_fN(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + f_norm_trust_region_omp = f_norm_trust_region_omp + tmp_fN(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + deallocate(tmp_fN) + +end function + +! First derivative of (||x||^2 - Delta^2)^2 +! Version without omp + +! *Function to compute the first derivative of ||x||^2 - Delta* + +! This function computes the first derivative of (||x||^2 - Delta^2)^2 +! with respect to lambda. + +! \begin{align*} +! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left(-2\sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) +! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +! | ddot | double precision | blas dot product | + + +function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i, j + + ! Functions + double precision :: d1_norm_trust_region + double precision :: ddot + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_trust_region = 2d0 * accu2 * (accu1 - delta**2) + +end function + +! Second derivative of (||x||^2 - Delta^2)^2 +! Version without OMP + +! *Function to compute the second derivative of ||x||^2 - Delta* + + +! \begin{equation} +! \frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +! \end{equation} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | accu3 | double precision | third sum of the formula | +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d2_norm_trust_region | double precision | second derivative with respect to lambda of norm(x)^2 - Delta^2 | +! | ddot | double precision | blas dot product | + + +function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region + double precision :: ddot + + ! Internal + double precision :: wtg,accu1,accu2,accu3 + integer :: i, j + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 !4 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 !2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu3 = accu3 + 6d0 * wtg**2 / (e_val(i) + lambda)**4 !3 + endif + enddo + + d2_norm_trust_region = 2d0 * (accu3 * (- delta**2 + accu1) + accu2**2) + +end function + +! Function value of ||x||^2 +! Version without OMP + +! *Compute the value of ||x||^2* + +! This function computes the value of ||x(lambda)||^2 + +! \begin{align*} +! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | f_norm_trust_region | double precision | value of norm(x)^2 | +! | ddot | double precision | blas dot product | + + + +function f_norm_trust_region(n,e_val,tmp_wtg,lambda) + + include 'pi.h' + + !BEGIN_DOC + ! Compute ||x(lambda)||^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! function + double precision :: f_norm_trust_region + double precision :: ddot + + ! internal + integer :: i,j + + ! Initialization + f_norm_trust_region = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + f_norm_trust_region = f_norm_trust_region + tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + +end function + +! OMP: First derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version with OMP + +! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ +! &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1, accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_inverse_trust_region_omp = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + + deallocate(tmp_accu1, tmp_accu2) + +end + +! OMP: Second derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version with OMP + +! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ +! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +! \end{align*} + + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2, accu3 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + + ! Functions + double precision :: d2_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d2_norm_inverse_trust_region_omp = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + + deallocate(tmp_accu1,tmp_accu2,tmp_accu3) + +end + +! First derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version without OMP + +! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ +! &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +! \end{align*} +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i,j + + ! Functions + double precision :: d1_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_inverse_trust_region = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + +end + +! Second derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version without OMP + +! *Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ +! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d2_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2, accu3 + integer :: i,j + + ! Functions + double precision :: d2_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu3 = accu3 + wtg**2 / (e_val(i) + lambda)**4 + endif + enddo + + d2_norm_inverse_trust_region = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + +end diff --git a/src/utils_trust_region/trust_region_rho.irp.f b/src/utils_trust_region/trust_region_rho.irp.f new file mode 100644 index 00000000..11ab11e9 --- /dev/null +++ b/src/utils_trust_region/trust_region_rho.irp.f @@ -0,0 +1,120 @@ +! Agreement with the model: Rho + +! *Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)* + +! Rho represents the agreement between the model (the predicted energy +! by the Taylor expansion truncated at the 2nd order) and the real +! energy : + +! \begin{equation} +! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +! \end{equation} +! With : +! $E^{k}$ the energy at the previous iteration +! $E^{k+1}$ the energy at the actual iteration +! $m^{k+1}$ the predicted energy for the actual iteration +! (cf. trust_e_model) + +! If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$. +! If $\rho \leq 0$ the previous energy is lower than the actual +! energy. We have to cancel the last step and use a smaller trust +! region. +! Here we cancel the last step if $\rho < 0.1$, because even if +! the energy decreases, the agreement is bad, i.e., the Taylor expansion +! truncated at the second order doesn't represent correctly the energy +! landscape. So it's better to cancel the step and restart with a +! smaller trust region. + +! Provided in qp_edit: +! | thresh_rho | + +! Input: +! | prev_energy | double precision | previous energy (energy before the rotation) | +! | e_model | double precision | predicted energy after the rotation | + +! Output: +! | rho | double precision | the agreement between the model (predicted) and the real energy | +! | prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy | +! | | | else the previous energy doesn't change | + +! Internal: +! | energy | double precision | energy (real) after the rotation | +! | i | integer | index | +! | t* | double precision | time | + + +subroutine trust_region_rho(prev_energy, energy,e_model,rho) + + include 'pi.h' + + !BEGIN_DOC + ! Compute rho, the agreement between the predicted criterion/energy and the real one + !END_DOC + + implicit none + + ! Variables + + ! In + double precision, intent(inout) :: prev_energy + double precision, intent(in) :: e_model, energy + + ! Out + double precision, intent(out) :: rho + + ! Internal + double precision :: t1, t2, t3 + integer :: i + + print*,'' + print*,'---Rho_model---' + + !call wall_time(t1) + +! Rho +! \begin{equation} +! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +! \end{equation} + +! In function of $\rho$ th step can be accepted or cancelled. + +! If we cancel the last step (k+1), the previous energy (k) doesn't +! change! +! If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) + + +! Already done in an other subroutine + !if (ABS(prev_energy - e_model) < 1d-12) then + ! print*,'WARNING: prev_energy - e_model < 1d-12' + ! print*,'=> rho will tend toward infinity' + ! print*,'Check you convergence criterion !' + !endif + + rho = (prev_energy - energy) / (prev_energy - e_model) + + !print*, 'previous energy, prev_energy:', prev_energy + !print*, 'predicted energy, e_model:', e_model + !print*, 'real energy, energy:', energy + !print*, 'prev_energy - energy:', prev_energy - energy + !print*, 'prev_energy - e_model:', prev_energy - e_model + print*, 'Rho:', rho + !print*, 'Threshold for rho:', thresh_rho + + ! Modification of prev_energy in function of rho + if (rho < thresh_rho) then !0.1) then + ! the step is cancelled + print*, 'Rho <', thresh_rho,', the previous energy does not changed' + !print*, 'prev_energy :', prev_energy + else + ! the step is accepted + prev_energy = energy + print*, 'Rho >=', thresh_rho,', energy -> prev_energy:', energy + endif + + !call wall_time(t2) + !t3 = t2 - t1 + !print*,'Time in rho model:', t3 + + print*,'---End rho_model---' + +end subroutine diff --git a/src/utils_trust_region/trust_region_step.irp.f b/src/utils_trust_region/trust_region_step.irp.f new file mode 100644 index 00000000..54161a1c --- /dev/null +++ b/src/utils_trust_region/trust_region_step.irp.f @@ -0,0 +1,749 @@ +! Trust region + +! *Compute the next step with the trust region algorithm* + +! The Newton method is an iterative method to find a minimum of a given +! function. It uses a Taylor series truncated at the second order of the +! targeted function and gives its minimizer. The minimizer is taken as +! the new position and the same thing is done. And by doing so +! iteratively the method find a minimum, a local or global one depending +! of the starting point and the convexity/nonconvexity of the targeted +! function. + +! The goal of the trust region is to constrain the step size of the +! Newton method in a certain area around the actual position, where the +! Taylor series is a good approximation of the targeted function. This +! area is called the "trust region". + +! In addition, in function of the agreement between the Taylor +! development of the energy and the real energy, the size of the trust +! region will be updated at each iteration. By doing so, the step sizes +! are not too larges. In addition, since we add a criterion to cancel the +! step if the energy increases (more precisely if rho < 0.1), so it's +! impossible to diverge. \newline + +! References: \newline +! Nocedal & Wright, Numerical Optimization, chapter 4 (1999), \newline +! https://link.springer.com/book/10.1007/978-0-387-40065-5, \newline +! ISBN: 978-0-387-40065-5 \newline + +! By using the first and the second derivatives, the Newton method gives +! a step: +! \begin{align*} +! \textbf{x}_{(k+1)}^{\text{Newton}} = - \textbf{H}_{(k)}^{-1} \cdot +! \textbf{g}_{(k)} +! \end{align*} +! which leads to the minimizer of the Taylor series. +! !!! Warning: the Newton method gives the minimizer if and only if +! $\textbf{H}$ is positive definite, else it leads to a saddle point !!! +! But we want a step $\textbf{x}_{(k+1)}$ with a constraint on its (euclidian) norm: +! \begin{align*} +! ||\textbf{x}_{(k+1)}|| \leq \Delta_{(k+1)} +! \end{align*} +! which is equivalent to +! \begin{align*} +! \textbf{x}_{(k+1)}^T \cdot \textbf{x}_{(k+1)} \leq \Delta_{(k+1)}^2 +! \end{align*} + +! with: \newline +! $\textbf{x}_{(k+1)}$ is the step for the k+1-th iteration (vector of +! size n) \newline +! $\textbf{H}_{(k)}$ is the hessian at the k-th iteration (n by n +! matrix) \newline +! $\textbf{g}_{(k)}$ is the gradient at the k-th iteration (vector of +! size n) \newline +! $\Delta_{(k+1)}$ is the trust radius for the (k+1)-th iteration +! \newline + +! Thus we want to constrain the step size $\textbf{x}_{(k+1)}$ into a +! hypersphere of radius $\Delta_{(k+1)}$.\newline + +! So, if $||\textbf{x}_{(k+1)}^{\text{Newton}}|| \leq \Delta_{(k)}$ and +! $\textbf{H}$ is positive definite, the +! solution is the step given by the Newton method +! $\textbf{x}_{(k+1)} = \textbf{x}_{(k+1)}^{\text{Newton}}$. +! Else we have to constrain the step size. For simplicity we will remove +! the index $_{(k)}$ and $_{(k+1)}$. To restict the step size, we have +! to put a constraint on $\textbf{x}$ with a Lagrange multiplier. +! Starting from the Taylor series of a function E (here, the energy) +! truncated at the 2nd order, we have: +! \begin{align*} +! E(\textbf{x}) = E +\textbf{g}^T \cdot \textbf{x} + \frac{1}{2} +! \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} + +! \mathcal{O}(\textbf{x}^2) +! \end{align*} + +! With the constraint on the norm of $\textbf{x}$ we can write the +! Lagrangian +! \begin{align*} +! \mathcal{L}(\textbf{x},\lambda) = E + \textbf{g}^T \cdot \textbf{x} +! + \frac{1}{2} \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} +! + \frac{1}{2} \lambda (\textbf{x}^T \cdot \textbf{x} - \Delta^2) +! \end{align*} +! Where: \newline +! $\lambda$ is the Lagrange multiplier \newline +! $E$ is the energy at the k-th iteration $\Leftrightarrow +! E(\textbf{x} = \textbf{0})$ \newline + +! To solve this equation, we search a stationary point where the first +! derivative of $\mathcal{L}$ with respect to $\textbf{x}$ becomes 0, i.e. +! \begin{align*} +! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}=0 +! \end{align*} + +! The derivative is: +! \begin{align*} +! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} +! = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} +! \end{align*} + +! So, we search $\textbf{x}$ such as: +! \begin{align*} +! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} +! = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} = 0 +! \end{align*} + +! We can rewrite that as: +! \begin{align*} +! \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} +! = \textbf{g} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x} = 0 +! \end{align*} +! with $\textbf{I}$ is the identity matrix. + +! By doing so, the solution is: +! \begin{align*} +! (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x}= -\textbf{g} +! \end{align*} +! \begin{align*} +! \textbf{x}= - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} +! \end{align*} +! with $\textbf{x}^T \textbf{x} = \Delta^2$. + +! We have to solve this previous equation to find this $\textbf{x}$ in the +! trust region, i.e. $||\textbf{x}|| = \Delta$. Now, this problem is +! just a one dimension problem because we can express $\textbf{x}$ as a +! function of $\lambda$: +! \begin{align*} +! \textbf{x}(\lambda) = - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} +! \end{align*} + +! We start from the fact that the hessian is diagonalizable. So we have: +! \begin{align*} +! \textbf{H} = \textbf{W} \cdot \textbf{h} \cdot \textbf{W}^T +! \end{align*} +! with: \newline +! $\textbf{H}$, the hessian matrix \newline +! $\textbf{W}$, the matrix containing the eigenvectors \newline +! $\textbf{w}_i$, the i-th eigenvector, i.e. i-th column of $\textbf{W}$ \newline +! $\textbf{h}$, the matrix containing the eigenvalues in ascending order \newline +! $h_i$, the i-th eigenvalue in ascending order \newline + +! Now we use the fact that adding a constant on the diagonal just shifts +! the eigenvalues: +! \begin{align*} +! \textbf{H} + \textbf{I} \lambda = \textbf{W} \cdot (\textbf{h} +! +\textbf{I} \lambda) \cdot \textbf{W}^T +! \end{align*} + +! By doing so we can express $\textbf{x}$ as a function of $\lambda$ +! \begin{align*} +! \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot +! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +! \end{align*} +! with $\lambda \neq - h_i$. + +! An interesting thing in our case is the norm of $\textbf{x}$, +! because we want $||\textbf{x}|| = \Delta$. Due to the orthogonality of +! the eigenvectors $\left\{\textbf{w} \right\} _{i=1}^n$ we have: +! \begin{align*} +! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot +! \textbf{g})^2}{(h_i + \lambda)^2} +! \end{align*} + +! So the $||\textbf{x}(\lambda)||^2$ is just a function of $\lambda$. +! And if we study the properties of this function we see that: +! \begin{align*} +! \lim_{\lambda\to\infty} ||\textbf{x}(\lambda)|| = 0 +! \end{align*} +! and if $\textbf{w}_i^T \cdot \textbf{g} \neq 0$: +! \begin{align*} +! \lim_{\lambda\to -h_i} ||\textbf{x}(\lambda)|| = + \infty +! \end{align*} + +! From these limits and knowing that $h_1$ is the lowest eigenvalue, we +! can conclude that $||\textbf{x}(\lambda)||$ is a continuous and +! strictly decreasing function on the interval $\lambda \in +! (-h_1;\infty)$. Thus, there is one $\lambda$ in this interval which +! gives $||\textbf{x}(\lambda)|| = \Delta$, consequently there is one +! solution. + +! Since $\textbf{x} = - (\textbf{H} + \lambda \textbf{I})^{-1} \cdot +! \textbf{g}$ and we want to reduce the norm of $\textbf{x}$, clearly, +! $\lambda > 0$ ($\lambda = 0$ is the unconstraint solution). But the +! Newton method is only defined for a positive definite hessian matrix, +! so $(\textbf{H} + \textbf{I} \lambda)$ must be positive +! definite. Consequently, in the case where $\textbf{H}$ is not positive +! definite, to ensure the positive definiteness, $\lambda$ must be +! greater than $- h_1$. +! \begin{align*} +! \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1 +! \end{align*} + +! From that there are five cases: +! - if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$ +! - if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot +! \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I} +! \lambda)$ +! must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$ +! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot +! \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing +! $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be +! positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$) +! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot +! \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing +! $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be +! positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is +! similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda = +! 0)|| \leq \Delta$ +! but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ +! time a constant to ensure the condition $||\textbf{x}(\lambda = +! -h_1)|| = \Delta$ and escape from the saddle point + +! Thus to find the solution, we can write: +! \begin{align*} +! ||\textbf{x}(\lambda)|| = \Delta +! \end{align*} +! \begin{align*} +! ||\textbf{x}(\lambda)|| - \Delta = 0 +! \end{align*} + +! Taking the square of this equation +! \begin{align*} +! (||\textbf{x}(\lambda)|| - \Delta)^2 = 0 +! \end{align*} +! we have a function with one minimum for the optimal $\lambda$. +! Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve +! \begin{align*} +! (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 +! \end{align*} + +! But in practice, it is more effective to solve: +! \begin{align*} +! (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 +! \end{align*} + +! To do that, we just use the Newton method with "trust_newton" using +! first and second derivative of $(||\textbf{x}(\lambda)||^2 - +! \Delta^2)^2$ with respect to $\textbf{x}$. +! This will give the optimal $\lambda$ to compute the +! solution $\textbf{x}$ with the formula seen previously: +! \begin{align*} +! \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot +! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +! \end{align*} + +! The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our +! step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$. + + + + +! Evolution of the trust region + +! We initialize the trust region at the first iteration using a radius +! \begin{align*} +! \Delta = ||\textbf{x}(\lambda=0)|| +! \end{align*} + +! And for the next iteration the trust region will evolves depending of +! the agreement of the energy prediction based on the Taylor series +! truncated at the 2nd order and the real energy. If the Taylor series +! truncated at the 2nd order represents correctly the energy landscape +! the trust region will be extent else it will be reduced. In order to +! mesure this agreement we use the ratio rho cf. "rho_model" and +! "trust_e_model". From that we use the following values: +! - if $\rho \geq 0.75$, then $\Delta = 2 \Delta$, +! - if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, +! - if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, +! - if $\rho < 0.25$, then $\Delta = 0.25 \Delta$. + +! In addition, if $\rho < 0.1$ the iteration is cancelled, so it +! restarts with a smaller trust region until the energy decreases. + + + + +! Summary + +! To summarize, knowing the hessian (eigenvectors and eigenvalues), the +! gradient and the radius of the trust region we can compute the norm of +! the Newton step +! \begin{align*} +! ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n +! \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0 +! \end{align*} + +! - if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and +! $\textbf{x}(\lambda=0)$ is in the trust region and it is not +! necessary to put a constraint on $\textbf{x}$, the solution is the +! unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$. +! - else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and +! $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in +! the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda = +! -h_1)$, similarly to the previous case. +! But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ +! time a constant to ensure the condition $||\textbf{x}(\lambda = +! -h_1)|| = \Delta$ and escape from the saddle point +! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we +! have to search $\lambda \in (-h_1, \infty)$ such as +! $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method +! \begin{align*} +! (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 +! \end{align*} +! or +! \begin{align*} +! (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 +! \end{align*} +! which is numerically more stable. And finally compute +! \begin{align*} +! \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot +! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +! \end{align*} +! - else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we +! do exactly the same thing that the previous case but we search +! $\lambda \in (0, \infty)$ +! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and +! $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the +! sum), again we do exactly the same thing that the previous case +! searching $\lambda \in (-h_1, \infty)$. + + +! For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not +! necessary in fact to remove the $j = 1$ in the sum since the term +! where $h_i - \lambda < 10^{-6}$ are not computed. + +! After that, we take this vector $\textbf{x}^*$, called "x", and we do +! the transformation to an antisymmetric matrix $\textbf{X}$, called +! m_x. This matrix $\textbf{X}$ will be used to compute a rotation +! matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix". + +! NB: +! An improvement can be done using a elleptical trust region. + + + + +! Code + +! Provided: +! | mo_num | integer | number of MOs | + +! Cf. qp_edit in orbital optimization section, for some constants/thresholds + +! Input: +! | m | integer | number of MOs | +! | n | integer | m*(m-1)/2 | +! | n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +! | H(n,n2) | double precision | hessian | +! | v_grad(n) | double precision | gradient | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n, n) | double precision | eigenvectors of the hessian | +! | rho | double precision | agreement between the model and the reality, | +! | | | represents the quality of the energy prediction | +! | nb_iter | integer | number of iteration | + +! Input/Ouput: +! | delta | double precision | radius of the trust region | + +! Output: +! | x(n) | double precision | vector containing the step | + +! Internal: +! | accu | double precision | temporary variable to compute the step | +! | lambda | double precision | lagrange multiplier | +! | trust_radius2 | double precision | square of the radius of the trust region | +! | norm2_x | double precision | norm^2 of the vector x | +! | norm2_g | double precision | norm^2 of the vector containing the gradient | +! | tmp_wtg(n) | double precision | tmp_wtg(i) = w_i^T . g | +! | i, j, k | integer | indexes | + +! Function: +! | dnrm2 | double precision | Blas function computing the norm | +! | f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) | + + +subroutine trust_region_step(n,n2,nb_iter,v_grad,rho,e_val,w,x,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compuet the step in the trust region + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n,n2 + double precision, intent(in) :: v_grad(n), rho + integer, intent(inout) :: nb_iter + double precision, intent(in) :: e_val(n), w(n,n2) + + ! inout + double precision, intent(inout) :: delta + + ! out + double precision, intent(out) :: x(n) + + ! Internal + double precision :: accu, lambda, trust_radius2 + double precision :: norm2_x, norm2_g + double precision, allocatable :: tmp_wtg(:) + integer :: i,j,k + double precision :: t1,t2,t3 + integer :: n_neg_eval + + + ! Functions + double precision :: ddot, dnrm2 + double precision :: f_norm_trust_region_omp + + print*,'' + print*,'==================' + print*,'---Trust_region---' + print*,'==================' + + call wall_time(t1) + + ! Allocation + allocate(tmp_wtg(n)) + +! Initialization and norm + +! The norm of the step size will be useful for the trust region +! algorithm. We start from a first guess and the radius of the trust +! region will evolve during the optimization. + +! avoid_saddle is actually a test to avoid saddle points + + +! Initialization of the Lagrange multiplier +lambda = 0d0 + +! List of w^T.g, to avoid the recomputation +tmp_wtg = 0d0 +if (n == n2) then + do j = 1, n + do i = 1, n + tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) + enddo + enddo +else + ! For the diagonal case + do j = 1, n + k = int(w(j,1)+1d-15) + tmp_wtg(j) = v_grad(k) + enddo +endif + +! Replacement of the small tmp_wtg corresponding to a negative eigenvalue +! in the case of avoid_saddle +if (avoid_saddle .and. e_val(1) < - thresh_eig) then + i = 2 + ! Number of negative eigenvalues + do while (e_val(i) < - thresh_eig) + if (tmp_wtg(i) < thresh_wtg2) then + if (version_avoid_saddle == 1) then + tmp_wtg(i) = 1d0 + elseif (version_avoid_saddle == 2) then + tmp_wtg(i) = DABS(e_val(i)) + elseif (version_avoid_saddle == 3) then + tmp_wtg(i) = dsqrt(DABS(e_val(i))) + else + tmp_wtg(i) = thresh_wtg2 + endif + endif + i = i + 1 + enddo + + ! For the fist one it's a little bit different + if (tmp_wtg(1) < thresh_wtg2) then + tmp_wtg(1) = 0d0 + endif + +endif + +! Norm^2 of x, ||x||^2 +norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) +! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta +! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm +! Anyway if the step is too big it will be reduced +!print*,'||x||^2 :', norm2_x + +! Norm^2 of the gradient, ||v_grad||^2 +norm2_g = (dnrm2(n,v_grad,1))**2 +!print*,'||grad||^2 :', norm2_g + +! Trust radius initialization + +! At the first iteration (nb_iter = 0) we initialize the trust region +! with the norm of the step generate by the Newton's method ($\textbf{x}_1 = +! (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$, +! we compute this norm using f_norm_trust_region_omp as explain just +! below) + + +! trust radius +if (nb_iter == 0) then + trust_radius2 = norm2_x + ! To avoid infinite loop of cancellation of this first step + ! without changing delta + nb_iter = 1 + + ! Compute delta, delta = sqrt(trust_radius) + delta = dsqrt(trust_radius2) +endif + +! Modification of the trust radius + +! In function of rho (which represents the agreement between the model +! and the reality, cf. rho_model) the trust region evolves. We update +! delta (the radius of the trust region). + +! To avoid too big trust region we put a maximum size. + + +! Modification of the trust radius in function of rho +if (rho >= 0.75d0) then + delta = 2d0 * delta +elseif (rho >= 0.5d0) then + delta = delta +elseif (rho >= 0.25d0) then + delta = 0.5d0 * delta +else + delta = 0.25d0 * delta +endif + +! Maximum size of the trust region +!if (delta > 0.5d0 * n * pi) then +! delta = 0.5d0 * n * pi +! print*,'Delta > delta_max, delta = 0.5d0 * n * pi' +!endif + +if (delta > 1d10) then + delta = 1d10 +endif + +!print*, 'Delta :', delta + +! Calculation of the optimal lambda + +! We search the solution of $(||x||^2 - \Delta^2)^2 = 0$ +! - If $||\textbf{x}|| > \Delta$ or $h_1 < 0$ we have to add a constant +! $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$ +! - If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the +! unconstrained one, $\lambda = 0$ + +! You will find more details at the beginning + + +! By giving delta, we search (||x||^2 - delta^2)^2 = 0 +! and not (||x||^2 - delta)^2 = 0 + +! Research of lambda to solve ||x(lambda)|| = Delta + +! Display +!print*, 'e_val(1) = ', e_val(1) +!print*, 'w_1^T.g =', tmp_wtg(1) + +! H positive definite +if (e_val(1) > - thresh_eig) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) + !print*, '||x(0)||=', dsqrt(norm2_x) + !print*, 'Delta=', delta + + ! H positive definite, ||x(lambda = 0)|| <= Delta + if (dsqrt(norm2_x) <= delta) then + !print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' + !print*, 'lambda = 0, no lambda optimization' + lambda = 0d0 + + ! H positive definite, ||x(lambda = 0)|| > Delta + else + ! Constraint solution + !print*, 'H positive definite, ||x(lambda = 0)|| > Delta' + !print*,'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + +! H indefinite +else + if (DABS(tmp_wtg(1)) < thresh_wtg) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1)) + !print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) + endif + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta + if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then + ! Add e_val(1) in order to have (H - e_val(1) I) positive definite + !print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' + !print*, 'lambda = -e_val(1), no lambda optimization' + lambda = - e_val(1) + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta + ! and + ! H indefinite, w_1^T.g =/= 0 + else + ! Constraint solution/ add lambda + !if (DABS(tmp_wtg(1)) < thresh_wtg) then + ! print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' + !else + ! print*, 'H indefinite, w_1^T.g =/= 0' + !endif + !print*, 'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + +endif + +! Recomputation of the norm^2 of the step x +norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) +print*,'' +print*,'Summary after the trust region:' +print*,'lambda:', lambda +print*,'||x||:', dsqrt(norm2_x) +print*,'delta:', delta + +! Calculation of the step x + +! x refers to $\textbf{x}^*$ +! We compute x in function of lambda using its formula : +! \begin{align*} +! \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i +! + \lambda} \cdot \textbf{w}_i +! \end{align*} + + +! Initialisation +x = 0d0 + +! Calculation of the step x + +if (n == n2) then + ! Normal version + if (.not. absolute_eig) then + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) + enddo + endif + enddo + + ! Version to use the absolute value of the eigenvalues + else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) + enddo + endif + enddo + + endif +else + ! If the hessian is diagonal + ! Normal version + if (.not. absolute_eig) then + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + j = int(w(i,1) + 1d-15) + x(j) = - tmp_wtg(i) * 1d0 / (e_val(i) + lambda) + endif + enddo + + ! Version to use the absolute value of the eigenvalues + else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + j = int(w(i,1) + 1d-15) + x(j) = - tmp_wtg(i) * 1d0 / (DABS(e_val(i)) + lambda) + endif + enddo + + endif +endif + +double precision :: beta, norm_x + +! Test +! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1) +! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first +! eigenvectors multiply by a constant to ensure the condition +! ||x(lambda=-e_val(1))|| = delta and escape the saddle point +if (avoid_saddle .and. e_val(1) < - thresh_eig) then + if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then + + ! norm of x + norm_x = dnrm2(n,x,1) + + ! Computes the coefficient for the w_1 + beta = delta**2 - norm_x**2 + + ! Updates the step x + x = x + W(:,1) * dsqrt(beta) + + ! Recomputes the norm to check + norm_x = dnrm2(n,x,1) + + print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):' + print*, '||x||', norm_x + endif +endif + +! Transformation of x + +! x is a vector of size n, so it can be write as a m by m +! antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index". + + +! ! Step transformation vector -> matrix +! ! Vector with n element -> mo_num by mo_num matrix +! do j = 1, m +! do i = 1, m +! if (i>j) then +! call mat_to_vec_index(i,j,k) +! m_x(i,j) = x(k) +! else +! m_x(i,j) = 0d0 +! endif +! enddo +! enddo +! +! ! Antisymmetrization of the previous matrix +! do j = 1, m +! do i = 1, m +! if (i lower diagonal matrix (p,q), p > q + +! If a matrix is antisymmetric it can be reshaped as a vector. And the +! vector can be reshaped as an antisymmetric matrix + +! \begin{align*} +! \begin{pmatrix} +! 0 & -1 & -2 & -4 \\ +! 1 & 0 & -3 & -5 \\ +! 2 & 3 & 0 & -6 \\ +! 4 & 5 & 6 & 0 +! \end{pmatrix} +! \Leftrightarrow +! \begin{pmatrix} +! 1 & 2 & 3 & 4 & 5 & 6 +! \end{pmatrix} +! \end{align*} + +! !!! Here the algorithm only work for the lower diagonal !!! + +! Input: +! | i | integer | index in the vector | + +! Ouput: +! | p,q | integer | corresponding indexes in the lower diagonal of a matrix | +! | | | p > q, | +! | | | p -> row, | +! | | | q -> column | + + +subroutine vec_to_mat_index(i,p,q) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing + ! its index i a vector + !END_DOC + + implicit none + + ! Variables + + ! in + integer,intent(in) :: i + + ! out + integer, intent(out) :: p,q + + ! internal + integer :: a,b + double precision :: da + + da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i))) + a = INT(da) + if ((a*(a-1))/2==i) then + p = a-1 + else + p = a + endif + b = p*(p-1)/2 + + ! Matrix element indexes + p = p + 1 + q = i - b + +end subroutine diff --git a/src/utils_trust_region/vec_to_mat_v2.irp.f b/src/utils_trust_region/vec_to_mat_v2.irp.f new file mode 100644 index 00000000..e184d3ba --- /dev/null +++ b/src/utils_trust_region/vec_to_mat_v2.irp.f @@ -0,0 +1,39 @@ +! Vect to antisymmetric matrix using mat_to_vec_index + +! Vector to antisymmetric matrix transformation using mat_to_vec_index +! subroutine. + +! Can be done in OMP (for the first part and with omp critical for the second) + + +subroutine vec_to_mat_v2(n,m,v_x,m_x) + + !BEGIN_DOC + ! Vector to antisymmetric matrix + !END_DOC + + implicit none + + integer, intent(in) :: n,m + double precision, intent(in) :: v_x(n) + double precision, intent(out) :: m_x(m,m) + + integer :: i,j,k + + ! 1D -> 2D lower diagonal + m_x = 0d0 + do j = 1, m - 1 + do i = j + 1, m + call mat_to_vec_index(i,j,k) + m_x(i,j) = v_x(k) + enddo + enddo + + ! Antisym + do i = 1, m - 1 + do j = i + 1, m + m_x(i,j) = - m_x(j,i) + enddo + enddo + +end From d6f7ec60f8e140982a24b282a36d027397d68d93 Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 18 Apr 2023 13:22:46 +0200 Subject: [PATCH 06/29] add mo localization --- src/mo_localization/84.mo_localization.bats | 97 + src/mo_localization/EZFIO.cfg | 54 + src/mo_localization/NEED | 3 + src/mo_localization/README.md | 113 + src/mo_localization/break_spatial_sym.irp.f | 27 + src/mo_localization/debug_gradient_loc.irp.f | 65 + src/mo_localization/debug_hessian_loc.irp.f | 65 + src/mo_localization/kick_the_mos.irp.f | 16 + src/mo_localization/localization.irp.f | 520 +++ src/mo_localization/localization_sub.irp.f | 2008 ++++++++++++ src/mo_localization/org/TANGLE_org_mode.sh | 7 + src/mo_localization/org/break_spatial_sym.org | 28 + .../org/debug_gradient_loc.org | 67 + src/mo_localization/org/debug_hessian_loc.org | 67 + src/mo_localization/org/kick_the_mos.org | 18 + src/mo_localization/org/localization.org | 2899 +++++++++++++++++ 16 files changed, 6054 insertions(+) create mode 100644 src/mo_localization/84.mo_localization.bats create mode 100644 src/mo_localization/EZFIO.cfg create mode 100644 src/mo_localization/NEED create mode 100644 src/mo_localization/README.md create mode 100644 src/mo_localization/break_spatial_sym.irp.f create mode 100644 src/mo_localization/debug_gradient_loc.irp.f create mode 100644 src/mo_localization/debug_hessian_loc.irp.f create mode 100644 src/mo_localization/kick_the_mos.irp.f create mode 100644 src/mo_localization/localization.irp.f create mode 100644 src/mo_localization/localization_sub.irp.f create mode 100755 src/mo_localization/org/TANGLE_org_mode.sh create mode 100644 src/mo_localization/org/break_spatial_sym.org create mode 100644 src/mo_localization/org/debug_gradient_loc.org create mode 100644 src/mo_localization/org/debug_hessian_loc.org create mode 100644 src/mo_localization/org/kick_the_mos.org create mode 100644 src/mo_localization/org/localization.org diff --git a/src/mo_localization/84.mo_localization.bats b/src/mo_localization/84.mo_localization.bats new file mode 100644 index 00000000..b34c0bd5 --- /dev/null +++ b/src/mo_localization/84.mo_localization.bats @@ -0,0 +1,97 @@ +#!/usr/bin/env bats + +source $QP_ROOT/tests/bats/common.bats.sh +source $QP_ROOT/quantum_package.rc + +zero () { + if [ -z "$1" ]; then echo 0.0; else echo $1; fi +} + +function run() { + thresh1=1e-10 + thresh2=1e-12 + thresh3=1e-4 + test_exe scf || skip + qp set_file $1 + qp edit --check + qp reset -d + qp set_frozen_core + qp set localization localization_method boys + file="$(echo $1 | sed 's/.ezfio//g')" + energy="$(cat $1/hartree_fock/energy)" + fb_err1="$(qp run debug_gradient_loc | grep 'Max error' | tail -n 1 | awk '{print $3}')" + fb_err2="$(qp run debug_hessian_loc | grep 'Max error' | tail -n 1 | awk '{print $3}')" + qp run localization > $file.loc.out + fb_energy="$(qp run print_energy | grep -A 1 'Nuclear repulsion energy' | tail -n 1 )" + fb_c="$(cat $file.loc.out | grep 'Criterion:Core' | tail -n 1 | awk '{print $3}')i" + fb_i="$(cat $file.loc.out | grep 'Criterion:Inactive' | tail -n 1 | awk '{print $3}')" + fb_a="$(cat $file.loc.out | grep 'Criterion:Active' | tail -n 1 | awk '{print $3}')" + fb_v="$(cat $file.loc.out | grep 'Criterion:Virtual' | tail -n 1 | awk '{print $3}')" + qp reset -a + qp run scf + qp set_frozen_core + qp set localization localization_method pipek + pm_err1="$(qp run debug_gradient_loc | grep 'Max error' | tail -n 1 | awk '{print $3}')" + pm_err2="$(qp run debug_hessian_loc | grep 'Max error' | tail -n 1 | awk '{print $3}')" + qp run localization > $file.loc.out + pm_c="$(cat $file.loc.out | grep 'Criterion:Core' | tail -n 1 | awk '{print $3}')i" + pm_i="$(cat $file.loc.out | grep 'Criterion:Inactive' | tail -n 1 | awk '{print $3}')" + pm_a="$(cat $file.loc.out | grep 'Criterion:Active' | tail -n 1 | awk '{print $3}')" + pm_v="$(cat $file.loc.out | grep 'Criterion:Virtual' | tail -n 1 | awk '{print $3}')" + pm_energy="$(qp run print_energy | grep -A 1 'Nuclear repulsion energy' | tail -n 1 )" + qp set localization localization_method boys + qp reset -a + qp run scf + qp set_frozen_core + eq $energy $fb_energy $thresh1 + eq $fb_err1 0.0 $thresh2 + eq $fb_err2 0.0 $thresh2 + eq $energy $pm_energy $thresh1 + eq $pm_err1 0.0 $thresh2 + eq $pm_err2 0.0 $thresh2 + fb_c=$(zero $fb_c) + fb_i=$(zero $fb_i) + fb_a=$(zero $fb_a) + fb_v=$(zero $fb_v) + pm_c=$(zero $pm_c) + pm_i=$(zero $pm_i) + pm_a=$(zero $pm_a) + pm_v=$(zero $pm_v) + eq $fb_c $2 $thresh3 + eq $fb_i $3 $thresh3 + eq $fb_a $4 $thresh3 + eq $fb_v $5 $thresh3 + eq $pm_c $6 $thresh3 + eq $pm_i $7 $thresh3 + eq $pm_a $8 $thresh3 + eq $pm_v $9 $thresh3 +} + +@test "b2_stretched" { +run b2_stretched.ezfio -32.1357551678876 -47.0041982094667 0.0 -223.470015856259 -1.99990778964451 -2.51376723927071 0.0 -12.8490602539275 +} + +@test "clo" { +run clo.ezfio -44.1624001765291 -32.4386660941387 0.0 -103.666309287187 -5.99985418946811 -5.46871580225222 0.0 -20.2480064922275 +} + +@test "clf" { +run clf.ezfio -47.5143398826967 -35.7206886315104 0.0 -107.043029033468 -5.99994222062230 -6.63916513458470 0.0 -19.7035159913484 +} + +@test "h2o2" { +run h2o2.ezfio -7.76848143170524 -30.9694344369829 0.0 -175.898343829453 -1.99990497554575 -5.62980322957485 0.0 -33.5699813186666 +} + +@test "h2o" { +run h2o.ezfio 0.0 -2.52317434969591 0.0 -45.3136377925359 0.0 -3.01248365356981 0.0 -22.4470831240924 +} + +@test "h3coh" { +run h3coh.ezfio -3.66763692804590 -24.0463089480870 0.0 -111.485948435075 -1.99714061342078 -4.89242181322988 0.0 -23.6405412057679 +} + +@test "n2h4" { +run n2h4.ezfio -7.46608163002070 -35.7632174051822 0.0 -305.913449004632 -1.99989326143356 -4.62496615892268 0.0 -51.5171904685553 +} + diff --git a/src/mo_localization/EZFIO.cfg b/src/mo_localization/EZFIO.cfg new file mode 100644 index 00000000..d1b844a5 --- /dev/null +++ b/src/mo_localization/EZFIO.cfg @@ -0,0 +1,54 @@ +[localization_method] +type: character*(32) +doc: Method for the orbital localization. boys: Foster-Boys, pipek: Pipek-Mezey. +interface: ezfio,provider,ocaml +default: boys + +[localization_max_nb_iter] +type: integer +doc: Maximal number of iterations for the orbital localization. +interface: ezfio,provider,ocaml +default: 1000 + +[localization_use_hessian] +type: logical +doc: If true, it uses the trust region algorithm with the gradient and the diagonal of the hessian. Else it computes the rotation between each pair of MOs that should be applied to maximize/minimize the localization criterion. The last option is not easy to converge. +interface: ezfio,provider,ocaml +default: true + +[auto_mo_class] +type: logical +doc: If true, set automatically the classes. +interface: ezfio,provider,ocaml +default: true + +[thresh_loc_max_elem_grad] +type: double precision +doc: Threshold for the convergence, the localization exits when the largest element in the gradient is smaller than thresh_localization_max_elem_grad. +interface: ezfio,provider,ocaml +default: 1.e-6 + +[kick_in_mos] +type: logical +doc: If True, it applies a rotation of an angle angle_pre_rot between the MOs of a same mo_class before the localization. +interface: ezfio,provider,ocaml +default: true + +[angle_pre_rot] +type: double precision +doc: To define the angle for the rotation of the MOs before the localization (in rad). +interface: ezfio,provider,ocaml +default: 0.1 + +[sort_mos_by_e] +type: logical +doc: If True, the MOs are sorted using the diagonal elements of the Fock matrix. +interface: ezfio,provider,ocaml +default: false + +[debug_hf] +type: logical +doc: If True, prints the HF energy before/after the different steps of the localization. Only for debugging. +interface: ezfio,provider,ocaml +default: false + diff --git a/src/mo_localization/NEED b/src/mo_localization/NEED new file mode 100644 index 00000000..b438f39d --- /dev/null +++ b/src/mo_localization/NEED @@ -0,0 +1,3 @@ +hartree_fock +utils_trust_region +determinants diff --git a/src/mo_localization/README.md b/src/mo_localization/README.md new file mode 100644 index 00000000..c28a5ee1 --- /dev/null +++ b/src/mo_localization/README.md @@ -0,0 +1,113 @@ +# Orbital localisation +To localize the MOs: +``` +qp run localization +``` +By default, the different otbital classes are automatically set by splitting +the orbitales in the following classes: +- Core -> Core +- Active, doubly occupied -> Inactive +- Active, singly occupied -> Active +- Active, empty -> Virtual +- Deleted -> Deleted +The orbitals will be localized among each class, excpect the deleted ones. +If you want to choose another splitting, you can set +``` +qp set mo_localization auto_mo_class false +``` +and define the classes with +``` +qp set_mo_class -c [] -a [] -v [] -i [] -d [] +``` +for more information +``` +qp set_mo_class -q +``` +We don't care about the name of the +mo classes. The algorithm just localizes all the MOs of +a given class between them, for all the classes, except the deleted MOs. +If you are using the last option don't forget to reset the initial mo classes +after the localization. + +Before the localization, a kick is done for each mo class +(except the deleted ones) to break the MOs. This is done by +doing a given rotation between the MOs. +This feature can be removed by setting: +``` +qp set localization kick_in_mos false +``` +and the default angle for the rotation can be changed with: +``` +qp set localization angle_pre_rot 1e-3 # or something else +``` + +After the localization, the MOs of each class (except the deleted ones) +can be sorted between them using the diagonal elements of +the fock matrix with: +``` +qp set localization sort_mos_by_e true +``` + +You can check the Hartree-Fock energy before/during/after the localization +by putting (only for debugging): +``` +qp set localization debug_hf true +``` + +## Foster-Boys & Pipek-Mezey +Foster-Boys: +``` +qp set localization localization_method boys +``` + +Pipek-Mezey: +``` +qp set localization localization_method pipek +``` + +# Break the spatial symmetry of the MOs +This program work exactly as the localization. +To break the spatial symmetry of the MOs: +``` +qp run break_spatial_sym +``` +The default angle for the rotations is too big for this kind of +application, a value between 1e-3 and 1e-6 should break the spatial +symmetry with just a small change in the energy: +``` +qp set localization angle_pre_rot 1e-3 +``` + +# With or without hessian + trust region +With hessian + trust region +``` +qp set localization localisation_use_hessian true +``` +It uses the trust region algorithm with the diagonal of the hessian of the +localization criterion with respect to the MO rotations. + +Without the hessian and the trust region +``` +qp set localization localisation_use_hessian false +``` +By doing so it does not require to store the hessian but the +convergence is not easy, in particular for virtual MOs. +It seems that it not possible to converge with Pipek-Mezey +localization with this approach. + +# Parameters +Some other parameters are available for the localization (qp edit for more details). + +# Tests +``` +qp test +``` + +# Org files +The org files are stored in the directory org in order to avoid overwriting on user changes. +The org files can be modified, to export the change to the source code, run +``` +./TANGLE_org_mode.sh +mv *.irp.f ../. +``` + diff --git a/src/mo_localization/break_spatial_sym.irp.f b/src/mo_localization/break_spatial_sym.irp.f new file mode 100644 index 00000000..2048aca6 --- /dev/null +++ b/src/mo_localization/break_spatial_sym.irp.f @@ -0,0 +1,27 @@ +! ! A small program to break the spatial symmetry of the MOs. + +! ! You have to defined your MO classes or set security_mo_class to false +! ! with: +! ! qp set orbital_optimization security_mo_class false + +! ! The default angle for the rotations is too big for this kind of +! ! application, a value between 1e-3 and 1e-6 should break the spatial +! ! symmetry with just a small change in the energy. + + +program break_spatial_sym + + !BEGIN_DOC + ! Break the symmetry of the MOs with a rotation + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + call set_classes_loc + call apply_pre_rotation + call unset_classes_loc + +end diff --git a/src/mo_localization/debug_gradient_loc.irp.f b/src/mo_localization/debug_gradient_loc.irp.f new file mode 100644 index 00000000..d935e782 --- /dev/null +++ b/src/mo_localization/debug_gradient_loc.irp.f @@ -0,0 +1,65 @@ +program debug_gradient_loc + + !BEGIN_DOC + ! Check if the gradient is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: v_grad(:), v_grad2(:) + double precision :: norm, max_elem, threshold, max_error + integer :: i, nb_error + + threshold = 1d-12 + + list_size = dim_list_act_orb + + allocate(list(list_size)) + + list = list_act + + n = list_size*(list_size-1)/2 + + allocate(v_grad(n),v_grad2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call gradient_FB(n,list_size,list,v_grad,max_elem,norm) + call gradient_FB_omp(n,list_size,list,v_grad2,max_elem,norm) + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey' + call gradient_PM(n,list_size,list,v_grad,max_elem,norm) + call gradient_PM(n,list_size,list,v_grad2,max_elem,norm) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,v_grad(i) + enddo + + v_grad = v_grad - v_grad2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(v_grad(i)) > threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + if (dabs(v_grad(i)) > max_elem) then + max_elem = v_grad(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(v_grad,v_grad2) + +end diff --git a/src/mo_localization/debug_hessian_loc.irp.f b/src/mo_localization/debug_hessian_loc.irp.f new file mode 100644 index 00000000..3ee4f0fa --- /dev/null +++ b/src/mo_localization/debug_hessian_loc.irp.f @@ -0,0 +1,65 @@ +program debug_hessian_loc + + !BEGIN_DOC + ! Check if the hessian is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: H(:), H2(:) + double precision :: threshold, max_error, max_elem + integer :: i, nb_error + + threshold = 1d-12 + + list_size = dim_list_act_orb + + allocate(list(list_size)) + + list = list_act + + n = list_size*(list_size-1)/2 + + allocate(H(n),H2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call hessian_FB(n,list_size,list,H) + call hessian_FB_omp(n,list_size,list,H2) + elseif(localization_method == 'pipek') then + print*,'Pipek-Mezey' + call hessian_PM(n,list_size,list,H) + call hessian_PM(n,list_size,list,H2) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,H(i) + enddo + + H = H - H2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(H(i)) > threshold) then + print*,H(i) + nb_error = nb_error + 1 + if (dabs(H(i)) > max_elem) then + max_elem = H(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(H,H2) + +end diff --git a/src/mo_localization/kick_the_mos.irp.f b/src/mo_localization/kick_the_mos.irp.f new file mode 100644 index 00000000..b6c77c9e --- /dev/null +++ b/src/mo_localization/kick_the_mos.irp.f @@ -0,0 +1,16 @@ +program kick_the_mos + + !BEGIN_DOC + ! To do a small rotation of the MOs + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + call set_classes_loc + call apply_pre_rotation + call unset_classes_loc + +end diff --git a/src/mo_localization/localization.irp.f b/src/mo_localization/localization.irp.f new file mode 100644 index 00000000..7ccb2f5a --- /dev/null +++ b/src/mo_localization/localization.irp.f @@ -0,0 +1,520 @@ +program localization + + implicit none + + call set_classes_loc + call run_localization + call unset_classes_loc + +end + + + + +! Variables: +! | pre_rot(mo_num, mo_num) | double precision | Matrix for the pre rotation | +! | R(mo_num,mo_num) | double precision | Rotation matrix | +! | tmp_R(:,:) | double precision | Rottation matrix in a subsapce | +! | prev_mos(ao_num, mo_num) | double precision | Previous mo_coef | +! | spatial_extent(mo_num) | double precision | Spatial extent of the orbitals | +! | criterion | double precision | Localization criterion | +! | prev_criterion | double precision | Previous criterion | +! | criterion_model | double precision | Estimated next criterion | +! | rho | double precision | Ratio to measure the agreement between the model | +! | | | and the reality | +! | delta | double precision | Radisu of the trust region | +! | norm_grad | double precision | Norm of the gradient | +! | info | integer | for dsyev from Lapack | +! | max_elem | double precision | maximal element in the gradient | +! | v_grad(:) | double precision | Gradient | +! | H(:,:) | double precision | Hessian (diagonal) | +! | e_val(:) | double precision | Eigenvalues of the hessian | +! | W(:,:) | double precision | Eigenvectors of the hessian | +! | tmp_x(:) | double precision | Step in 1D (in a subaspace) | +! | tmp_m_x(:,:) | double precision | Step in 2D (in a subaspace) | +! | tmp_list(:) | double precision | List of MOs in a mo_class | +! | i,j,k | integer | Indexes in the full MO space | +! | tmp_i, tmp_j, tmp_k | integer | Indexes in a subspace | +! | l | integer | Index for the mo_class | +! | key(:) | integer | Key to sort the eigenvalues of the hessian | +! | nb_iter | integer | Number of iterations | +! | must_exit | logical | To exit the trust region loop | +! | cancel_step | logical | To cancel a step | +! | not_*converged | logical | To localize the different mo classes | +! | t* | double precision | To measure the time | +! | n | integer | mo_num*(mo_num-1)/2, number of orbital parameters | +! | tmp_n | integer | dim_subspace*(dim_subspace-1)/2 | +! | | | Number of dimension in the subspace | + +! Variables in qp_edit for the localization: +! | localization_method | +! | localization_max_nb_iter | +! | default_mo_class | +! | thresh_loc_max_elem_grad | +! | kick_in_mos | +! | angle_pre_rot | + +! + all the variables for the trust region + +! Cf. qp_edit orbital optimization + + +subroutine run_localization + + include 'pi.h' + + BEGIN_DOC + ! Orbital localization + END_DOC + + implicit none + + ! Variables + double precision, allocatable :: pre_rot(:,:), R(:,:) + double precision, allocatable :: prev_mos(:,:), spatial_extent(:), tmp_R(:,:) + double precision :: criterion, norm_grad + integer :: i,j,k,l,p, tmp_i, tmp_j, tmp_k + integer :: info + integer :: n, tmp_n, tmp_list_size + double precision, allocatable :: v_grad(:), H(:), tmp_m_x(:,:), tmp_x(:),W(:),e_val(:) + double precision :: max_elem, t1, t2, t3, t4, t5, t6 + integer, allocatable :: tmp_list(:), key(:) + double precision :: prev_criterion, rho, delta, criterion_model + integer :: nb_iter, nb_sub_iter + logical :: not_converged, not_core_converged + logical :: not_act_converged, not_inact_converged, not_virt_converged + logical :: use_trust_region, must_exit, cancel_step,enforce_step_cancellation + + n = mo_num*(mo_num-1)/2 + + ! Allocation + allocate(spatial_extent(mo_num)) + allocate(pre_rot(mo_num, mo_num), R(mo_num, mo_num)) + allocate(prev_mos(ao_num, mo_num)) + + ! Locality before the localization + call compute_spatial_extent(spatial_extent) + + ! Choice of the method + print*,'' + print*,'Localization method:',localization_method + if (localization_method == 'boys') then + print*,'Foster-Boys localization' + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey localization' + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + print*,'' + + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### Before the pre rotation' + + ! Debug + if (debug_hf) then + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + if (tmp_list_size >= 2) then + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, mo_class(tmp_list(1)) + endif + + deallocate(tmp_list) + + enddo + + ! Debug + !print*,'HF', HF_energy + +! Loc + +! Pre rotation, to give a little kick in the MOs + call apply_pre_rotation() + + ! Criterion after the pre rotation + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### After the pre rotation' + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, trim(mo_class(tmp_list(1))) + + deallocate(tmp_list) + endif + + enddo + + ! Debug + !print*,'HF', HF_energy + + print*,'' + print*,'========================' + print*,' Orbital localization' + print*,'========================' + print*,'' + + !Initialization + not_converged = .TRUE. + + ! To do the localization only if there is at least 2 MOs + if (dim_list_core_orb >= 2) then + not_core_converged = .TRUE. + else + not_core_converged = .FALSE. + endif + + if (dim_list_act_orb >= 2) then + not_act_converged = .TRUE. + else + not_act_converged = .FALSE. + endif + + if (dim_list_inact_orb >= 2) then + not_inact_converged = .TRUE. + else + not_inact_converged = .FALSE. + endif + + if (dim_list_virt_orb >= 2) then + not_virt_converged = .TRUE. + else + not_virt_converged = .FALSE. + endif + + ! Loop over the mo_classes + do l = 1, 4 + + if (l==1) then ! core + not_converged = not_core_converged + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + not_converged = not_act_converged + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + not_converged = not_inact_converged + tmp_list_size = dim_list_inact_orb + else ! virt + not_converged = not_virt_converged + tmp_list_size = dim_list_virt_orb + endif + + ! Next iteration if converged = true + if (.not. not_converged) then + cycle + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + ! Display + if (not_converged) then + print*,'' + print*,'###', trim(mo_class(tmp_list(1))), 'MOs ###' + print*,'' + endif + + ! Size for the 2D -> 1D transformation + tmp_n = tmp_list_size * (tmp_list_size - 1)/2 + + ! Without hessian + trust region + if (.not. localization_use_hessian) then + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size), tmp_x(tmp_n)) + + ! Criterion + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Init + nb_iter = 0 + delta = 1d0 + + !Loop + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Angles of rotation + call theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + tmp_m_x = - tmp_m_x * delta + + ! Rotation submatrix + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + ! To ensure that the rotation matrix is unitary + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + delta = delta * 0.5d0 + cycle + else + delta = min(delta * 2d0, 1d0) + endif + + ! Full rotation matrix and application of the rotation + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + call apply_mo_rotation(R, prev_mos) + + ! Update the needed data + call update_data_localization() + + ! New criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + print*,'Max elem :', max_elem + print*,'Delta :', delta + + nb_iter = nb_iter + 1 + + ! Exit + if (nb_iter >= localization_max_nb_iter .or. dabs(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + enddo + + ! Save the changes + call update_data_localization() + call save_mos() + TOUCH mo_coef + + ! Deallocate + deallocate(v_grad, tmp_m_x, tmp_list) + deallocate(tmp_R, tmp_x) + + ! Trust region + else + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), H(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size)) + allocate(tmp_x(tmp_n), W(tmp_n), e_val(tmp_n), key(tmp_n)) + + ! ### Initialization ### + delta = 0d0 ! can be deleted (normally) + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must be 0.5 + + ! Compute the criterion before the loop + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Loop until the convergence + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Gradient + call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + ! Diagonal hessian + call hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + ! Diagonalization of the diagonal hessian by hands + !call diagonalization_hessian(tmp_n,H,e_val,w) + do i = 1, tmp_n + e_val(i) = H(i) + enddo + + ! Key list for dsort + do i = 1, tmp_n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, tmp_n) + + ! Eigenvectors + W = 0d0 + do i = 1, tmp_n + W(i) = dble(key(i)) + enddo + + ! To enter in the loop just after + cancel_step = .True. + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'-----------------------------' + print*, mo_class(tmp_list(1)) + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'Max elem grad:', max_elem + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,1, H, W, e_val, v_grad, prev_criterion, & + rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + ! Internal loop exit condition + if (must_exit) then + print*,'trust_region_step_w_expected_e sent: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! Update the things related to mo_coef + call update_data_localization() + + ! Update the criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, & + criterion_model, rho, cancel_step) + + ! Cancellation of the step, previous MOs + if (cancel_step) then + mo_coef = prev_mos + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + !call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exti = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + if (nb_iter > localization_max_nb_iter) then + not_converged = .False. + endif + enddo + + ! Deallocation of temporary arrays + deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key) + + ! Save the MOs + call save_mos() + TOUCH mo_coef + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + endif + enddo + + ! Seems unecessary + TOUCH mo_coef + + ! To sort the MOs using the diagonal elements of the Fock matrix + if (sort_mos_by_e) then + call run_sort_by_fock_energies() + endif + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + ! Locality after the localization + call compute_spatial_extent(spatial_extent) + +end diff --git a/src/mo_localization/localization_sub.irp.f b/src/mo_localization/localization_sub.irp.f new file mode 100644 index 00000000..f5afed07 --- /dev/null +++ b/src/mo_localization/localization_sub.irp.f @@ -0,0 +1,2008 @@ +! Gathering +! Gradient/hessian/criterion for the localization: +! They are chosen in function of the localization method + +! Gradient: + +! qp_edit : +! | localization_method | method for the localization | + +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + + + +subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the gradient of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + + if (localization_method == 'boys') then + call gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + !call gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + elseif (localization_method== 'pipek') then + call gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + else + print*,'Unkown method:'//localization_method + call abort + endif + +end + + + +! Hessian: + +! Output: +! | H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + + +subroutine hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + + if (localization_method == 'boys') then + call hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + !call hessian_FB(tmp_n, tmp_list_size, tmp_list, H) ! non OMP for debugging + elseif (localization_method == 'pipek') then + call hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end + + + +! Criterion: + +! Output: +! | criterion | double precision | Criterion for the orbital localization | + + +subroutine criterion_localization(tmp_list_size, tmp_list,criterion) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the localization criterion of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + + if (localization_method == 'boys') then + call criterion_FB(tmp_list_size, tmp_list, criterion) + elseif (localization_method == 'pipek') then + !call criterion_PM(tmp_list_size, tmp_list,criterion) + call criterion_PM_v3(tmp_list_size, tmp_list, criterion) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end + + + +! Subroutine to update the datas needed for the localization + +subroutine update_data_localization() + + include 'pi.h' + + implicit none + + if (localization_method == 'boys') then + ! Update the dipoles + call ao_to_mo_no_sym(ao_dipole_x, ao_num, mo_dipole_x, mo_num) + call ao_to_mo_no_sym(ao_dipole_y, ao_num, mo_dipole_y, mo_num) + call ao_to_mo_no_sym(ao_dipole_z, ao_num, mo_dipole_z, mo_num) + elseif (localization_method == 'pipek') then + ! Nothing required + else + print*,'Unkown method: '//localization_method + call abort + endif +end + + + +! Angles: + +! Output: +! | tmp_m_x(tmp_list_size, tmp_list_size) | double precision | Angles for the rotations in the subspace | +! | max_elem | double precision | Maximal angle | + + + +subroutine theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the rotation angles between the MOs for the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: tmp_m_x(tmp_list_size,tmp_list_size), max_elem + + if (localization_method == 'boys') then + call theta_FB(tmp_list, tmp_list_size, tmp_m_x, max_elem) + elseif (localization_method== 'pipek') then + call theta_PM(tmp_list, tmp_list_size, tmp_m_x, max_elem) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end + +! Gradient +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! | m_grad(tmp_n,tmp_n) | double precision | Gradient in the matrix form | +! | i,j,k | integer | indexes in the full space | +! | tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +! | t* | double precision | to compute the time | + + +subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB:', t3 + + print*,'---End gradient_FB---' + +end subroutine + +! Gradient (OMP) + +subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + use omp_lib + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB_omp---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,m_grad,v_grad,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + !$OMP END DO + + ! 2D -> 1D + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB_omp:', t3 + + print*,'---End gradient_FB_omp---' + +end subroutine + +! Hessian + +! Output: +! | H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! Internal: +! | beta(tmp_n,tmp_n) | double precision | beta in the documentation below to compute the hesian | +! | i,j,k | integer | indexes in the full space | +! | tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +! | t* | double precision | to compute the time | + + +subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB---' + + call wall_time(t1) + + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + + ! Diagonal of the hessian + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB:', t3 + + print*,'---End hessian_FB---' + +end subroutine + +! Hessian (OMP) + +subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i,tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB_omp---' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,beta,H,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + !$OMP END DO + + ! Initialization + !$OMP DO + do i = 1, tmp_n + H(i) = 0d0 + enddo + !$OMP END DO + + ! Diagonalm of the hessian + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB_omp:', t3 + + print*,'---End hessian_FB_omp---' + +end subroutine + +! Gradient v1 + +subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size)) + + ! Initialization + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Gradient + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int) + +end subroutine grad_pipek + +! Gradient + +! The gradient is + +! \begin{align*} +! \left. \frac{\partial \mathcal{P} (\theta)}{\partial \theta} \right|_{\theta=0}= \gamma^{PM} +! \end{align*} +! with +! \begin{align*} +! \gamma_{st}^{PM} = \sum_{A=1}^N \left[ - \right] +! \end{align*} + +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} +! $\sum_{\rho}$ -> sum over all the AOs +! $\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +! $c^t$ -> expansion coefficient of orbital |t> + +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! | m_grad(tmp_list_size,tmp_list_size) | double precision | Gradient in a 2D array | +! | tmp_int(tmp_list_size,tmp_list_size) | | Temporary array to store the integrals | +! | tmp_accu(tmp_list_size,tmp_list_size) | | Temporary array to store a matrix | +! | | | product and compute tmp_int | +! | CS(tmp_list_size,ao_num) | | Array to store the result of mo_coef * ao_overlap | +! | tmp_mo_coef(ao_num,tmp_list_size) | | Array to store just the useful MO coefficients | +! | | | depending of the mo_class | +! | tmp_mo_coef2(nucl_n_aos(a),tmp_list_size) | | Array to store just the useful MO coefficients | +! | | | depending of the nuclei | +! | tmp_CS(tmp_list_size,nucl_n_aos(a)) | | Array to store just the useful mo_coef * ao_overlap | +! | | | values depending of the nuclei | +! | a | | index to loop over the nuclei | +! | b | | index to loop over the AOs which belongs to the nuclei a | +! | mu | | index to refer to an AO which belongs to the nuclei a | +! | rho | | index to loop over all the AOs | + + +subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + double precision :: t1,t2,t3 + + print*,'' + print*,'---gradient_PM---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int,CS,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_PM:', t3 + + print*,'---End gradient_PM---' + +end + +! Hessian v1 + +subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size)) + + beta = 0d0 + + do a = 1, nucl_num + tmp_int = 0d0 + + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta,tmp_int) + +end + +! Hessian + +! The hessian is +! \begin{align*} +! \left. \frac{\partial^2 \mathcal{P} (\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta^{PM} +! \end{align*} +! \begin{align*} +! \beta_{st}^{PM} = \sum_{A=1}^N \left( ^2 - \frac{1}{4} \left[ - \right]^2 \right) +! \end{align*} + +! with +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} +! $\sum_{\rho}$ -> sum over all the AOs +! $\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +! $c^t$ -> expansion coefficient of orbital |t> + + +subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:),CS(:,:),tmp_mo_coef(:,:),tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_PM---' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size),tmp_accu(tmp_list_size,tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + beta = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta,tmp_int) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_PM:', t3 + + print*,'---End hessian_PM---' + +end + +! Criterion PM (old) + +subroutine compute_crit_pipek(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(tmp_int(mo_num, mo_num)) + + criterion = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do i = 1, mo_num + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(i,i) = tmp_int(i,i) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,i) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,i)) + + enddo + enddo + enddo + + do i = 1, mo_num + criterion = criterion + tmp_int(i,i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int) + +end + +! Criterion PM + +! The criterion is computed as +! \begin{align*} +! \mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ \right]^2 +! \end{align*} +! with +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} + + +subroutine criterion_PM(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:),CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + print*,'' + print*,'---criterion_PM---' + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),CS(mo_num,ao_num)) + + ! Initialization + criterion = 0d0 + + call dgemm('T','N',mo_num,ao_num,ao_num,1d0,mo_coef,size(mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_i) = tmp_int(tmp_i,tmp_i) + 0.5d0 * (CS(i,mu) * mo_coef(mu,i) + mo_coef(mu,i) * CS(i,mu)) + + ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS) + + print*,'---End criterion_PM---' + +end + +! Criterion PM v3 + +subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho,nu,c + double precision :: t1,t2,t3 + + print*,'' + print*,'---criterion_PM_v3---' + + call wall_time(t1) + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + criterion = 0d0 + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + ! ao_overlap(ao_num,ao_num) + ! mo_coef(ao_num,mo_num) + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + + do j = 1, tmp_list_size + do i = 1, tmp_list_size + tmp_int(i,j) = 0d0 + enddo + enddo + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + ! Integrals + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Criterion + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS,tmp_accu,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in criterion_PM_v3:', t3 + + print*,'---End criterion_PM_v3---' + +end + +! Criterion FB (old) + +! The criterion is just computed as + +! \begin{align*} +! C = - \sum_i^{mo_{num}} (^2 + ^2 + ^2) +! \end{align*} + +! The minus sign is here in order to minimize this criterion + +! Output: +! | criterion | double precision | criterion for the Foster-Boys localization | + + +subroutine criterion_FB_old(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + double precision, intent(out) :: criterion + integer :: i + + ! Criterion (= \sum_i ^2 ) + criterion = 0d0 + do i = 1, mo_num + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine + +! Criterion FB + +subroutine criterion_FB(tmp_list_size, tmp_list, criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + integer :: i, tmp_i + + ! Criterion (= - \sum_i ^2 ) + criterion = 0d0 + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine + +subroutine theta_FB(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: i,j, tmp_i, tmp_j + double precision, allocatable :: cos4theta(:,:), sin4theta(:,:) + double precision, allocatable :: A(:,:), B(:,:), beta(:,:), gamma(:,:) + integer :: idx_i,idx_j + + allocate(cos4theta(n, n), sin4theta(n, n)) + allocate(A(n,n), B(n,n), beta(n,n), gamma(n,n)) + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + A(tmp_i,tmp_j) = mo_dipole_x(i,j)**2 - 0.25d0 * (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 & + + mo_dipole_y(i,j)**2 - 0.25d0 * (mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 & + + mo_dipole_z(i,j)**2 - 0.25d0 * (mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 + enddo + A(j,j) = 0d0 + enddo + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + B(tmp_i,tmp_j) = mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j)) - 4d0 * mo_dipole_x(i,j)**2 & + ! + (mo_dipole_y(i,i) - mo_dipole_y(j,j)) - 4d0 * mo_dipole_y(i,j)**2 & + ! + (mo_dipole_z(i,i) - mo_dipole_z(j,j)) - 4d0 * mo_dipole_z(i,j)**2 + ! enddo + !enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! gamma(tmp_i,tmp_j) = 4d0 * ( mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + ! + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + ! + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j))) + ! enddo + !enddo + + ! + !do j = 1, n + ! do i = 1, n + ! cos4theta(i,j) = - A(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + !do j = 1, n + ! do i = 1, n + ! sin4theta(i,j) = B(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(B(i,j), -A(i,j)) + !m_x(i,j) = 0.25d0 * atan2(sin4theta(i,j), cos4theta(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + !idx_i = i + !idx_j = j + endif + enddo + enddo + + ! Debug + !print*,'' + !print*,'sin/B' + !do i = 1, n + ! write(*,'(100F10.4)') sin4theta(i,:) + ! !B(i,:) + !enddo + !print*,'cos/A' + !do i = 1, n + ! write(*,'(100F10.4)') cos4theta(i,:) + ! !A(i,:) + !enddo + !print*,'X' + !!m_x = 0d0 + !!m_x(idx_i,idx_j) = max_elem + !!m_x(idx_j,idx_i) = -max_elem + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(cos4theta, sin4theta) + deallocate(A,B,beta,gamma) + +end + +subroutine theta_PM(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: a,b,i,j,tmp_i,tmp_j,rho,mu,nu,idx_i,idx_j + double precision, allocatable :: Aij(:,:), Bij(:,:), Pa(:,:) + + allocate(Aij(n,n), Bij(n,n), Pa(n,n)) + + do a = 1, nucl_num ! loop over the nuclei + Pa = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + Pa(tmp_i,tmp_j) = Pa(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! A + do j = 1, n + do i = 1, n + Aij(i,j) = Aij(i,j) + Pa(i,j)**2 - 0.25d0 * (Pa(i,i) - Pa(j,j))**2 + enddo + enddo + + ! B + do j = 1, n + do i = 1, n + Bij(i,j) = Bij(i,j) + Pa(i,j) * (Pa(i,i) - Pa(j,j)) + enddo + enddo + + enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(Bij(i,j), -Aij(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + idx_i = i + idx_j = j + endif + enddo + enddo + + ! Debug + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,'Max',idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(Aij,Bij,Pa) + +end + +! Spatial extent + +! The spatial extent of an orbital $i$ is computed as +! \begin{align*} +! \sum_{\lambda=x,y,z}\sqrt{ - ^2} +! \end{align*} + +! From that we can also compute the average and the standard deviation + + +subroutine compute_spatial_extent(spatial_extent) + + implicit none + + BEGIN_DOC + ! Compute the spatial extent of the MOs + END_DOC + + double precision, intent(out) :: spatial_extent(mo_num) + double precision :: average_core, average_act, average_inact, average_virt + double precision :: std_var_core, std_var_act, std_var_inact, std_var_virt + integer :: i,j,k,l + + spatial_extent = 0d0 + + do i = 1, mo_num + spatial_extent(i) = mo_spread_x(i,i) - mo_dipole_x(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_y(i,i) - mo_dipole_y(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_z(i,i) - mo_dipole_z(i,i)**2 + enddo + + do i = 1, mo_num + spatial_extent(i) = dsqrt(spatial_extent(i)) + enddo + + average_core = 0d0 + std_var_core = 0d0 + if (dim_list_core_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core) + call compute_std_var_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core, std_var_core) + endif + + average_act = 0d0 + std_var_act = 0d0 + if (dim_list_act_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act) + call compute_std_var_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act, std_var_act) + endif + + average_inact = 0d0 + std_var_inact = 0d0 + if (dim_list_inact_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact) + call compute_std_var_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact, std_var_inact) + endif + + average_virt = 0d0 + std_var_virt = 0d0 + if (dim_list_virt_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt) + call compute_std_var_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt, std_var_virt) + endif + + print*,'' + print*,'=============================' + print*,' Spatial extent of the MOs' + print*,'=============================' + print*,'' + + print*, 'elec_num:', elec_num + print*, 'elec_alpha_num:', elec_alpha_num + print*, 'elec_beta_num:', elec_beta_num + print*, 'core:', dim_list_core_orb + print*, 'act:', dim_list_act_orb + print*, 'inact:', dim_list_inact_orb + print*, 'virt:', dim_list_virt_orb + print*, 'mo_num:', mo_num + print*,'' + + print*,'-- Core MOs --' + print*,'Average:', average_core + print*,'Std var:', std_var_core + print*,'' + + print*,'-- Active MOs --' + print*,'Average:', average_act + print*,'Std var:', std_var_act + print*,'' + + print*,'-- Inactive MOs --' + print*,'Average:', average_inact + print*,'Std var:', std_var_inact + print*,'' + + print*,'-- Virtual MOs --' + print*,'Average:', average_virt + print*,'Std var:', std_var_virt + print*,'' + + print*,'Spatial extent:' + do i = 1, mo_num + print*, i, spatial_extent(i) + enddo + +end + +subroutine compute_average_sp_ext(spatial_extent, list, list_size, average) + + implicit none + + BEGIN_DOC + ! Compute the average spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(out) :: average + integer :: i, tmp_i + + average = 0d0 + do tmp_i = 1, list_size + i = list(tmp_i) + average = average + spatial_extent(i) + enddo + + average = average / DBLE(list_size) + +end + +subroutine compute_std_var_sp_ext(spatial_extent, list, list_size, average, std_var) + + implicit none + + BEGIN_DOC + ! Compute the standard deviation of the spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(in) :: average + double precision, intent(out) :: std_var + integer :: i, tmp_i + + std_var = 0d0 + + do tmp_i = 1, list_size + i = list(tmp_i) + std_var = std_var + (spatial_extent(i) - average)**2 + enddo + + std_var = dsqrt(1d0/DBLE(list_size) * std_var) + +end + +! Utils + + +subroutine apply_pre_rotation() + + implicit none + + BEGIN_DOC + ! Apply a rotation between the MOs + END_DOC + + double precision, allocatable :: pre_rot(:,:), prev_mos(:,:), R(:,:) + double precision :: t1,t2,t3 + integer :: i,j,tmp_i,tmp_j + integer :: info + logical :: enforce_step_cancellation + + print*,'---apply_pre_rotation---' + call wall_time(t1) + + allocate(pre_rot(mo_num,mo_num), prev_mos(ao_num,mo_num), R(mo_num,mo_num)) + + ! Initialization of the matrix + pre_rot = 0d0 + + if (kick_in_mos) then + ! Pre rotation for core MOs + if (dim_list_core_orb >= 2) then + do tmp_j = 1, dim_list_core_orb + j = list_core(tmp_j) + do tmp_i = 1, dim_list_core_orb + i = list_core(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for active MOs + if (dim_list_act_orb >= 2) then + do tmp_j = 1, dim_list_act_orb + j = list_act(tmp_j) + do tmp_i = 1, dim_list_act_orb + i = list_act(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for inactive MOs + if (dim_list_inact_orb >= 2) then + do tmp_j = 1, dim_list_inact_orb + j = list_inact(tmp_j) + do tmp_i = 1, dim_list_inact_orb + i = list_inact(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for virtual MOs + if (dim_list_virt_orb >= 2) then + do tmp_j = 1, dim_list_virt_orb + j = list_virt(tmp_j) + do tmp_i = 1, dim_list_virt_orb + i = list_virt(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Nothing for deleted ones + + ! Compute pre rotation matrix from pre_rot + call rotation_matrix(pre_rot,mo_num,R,mo_num,mo_num,info,enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Cancellation of the pre rotation, too big error in the rotation matrix' + print*, 'Reduce the angle for the pre rotation, abort' + call abort + endif + + ! New Mos (we don't car eabout the previous MOs prev_mos) + call apply_mo_rotation(R,prev_mos) + + ! Update the things related to mo_coef + TOUCH mo_coef + call save_mos + endif + + deallocate(pre_rot, prev_mos, R) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in apply_pre_rotation:', t3 + print*,'---End apply_pre_rotation---' + +end + +subroutine x_tmp_orb_loc_v2(tmp_n, tmp_list_size, tmp_list, v_grad, H,tmp_x, tmp_m_x) + + implicit none + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(in) :: v_grad(tmp_n) + double precision, intent(in) :: H(tmp_n, tmp_n) + double precision, intent(out) :: tmp_m_x(tmp_list_size, tmp_list_size), tmp_x(tmp_list_size) + !double precision, allocatable :: x(:) + double precision :: lambda , accu, max_elem + integer :: i,j,tmp_i,tmp_j,tmp_k + + ! Allocation + !allocate(x(tmp_n)) + + ! Level shifted hessian + lambda = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < lambda) then + lambda = H(tmp_k,tmp_k) + endif + enddo + + ! min element in the hessian + if (lambda < 0d0) then + lambda = -lambda + 1d-6 + endif + + print*, 'lambda', lambda + + ! Good + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) > 1d-6) then + tmp_x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * v_grad(tmp_k)!(-v_grad(tmp_k)) + !x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * (-v_grad(tmp_k)) + endif + enddo + + ! 1D tmp -> 2D tmp + tmp_m_x = 0d0 + do tmp_j = 1, tmp_list_size - 1 + do tmp_i = tmp_j + 1, tmp_list_size + call mat_to_vec_index(tmp_i,tmp_j,tmp_k) + tmp_m_x(tmp_i, tmp_j) = tmp_x(tmp_k)!x(tmp_k) + enddo + enddo + + ! Antisym + do tmp_i = 1, tmp_list_size - 1 + do tmp_j = tmp_i + 1, tmp_list_size + tmp_m_x(tmp_i,tmp_j) = - tmp_m_x(tmp_j,tmp_i) + enddo + enddo + + ! Deallocation + !deallocate(x) + +end subroutine + +subroutine ao_to_mo_no_sym(A_ao,LDA_ao,A_mo,LDA_mo) + implicit none + BEGIN_DOC + ! Transform A from the |AO| basis to the |MO| basis + ! + ! $C^\dagger.A_{ao}.C$ + END_DOC + integer, intent(in) :: LDA_ao,LDA_mo + double precision, intent(in) :: A_ao(LDA_ao,ao_num) + double precision, intent(out) :: A_mo(LDA_mo,mo_num) + double precision, allocatable :: T(:,:) + + allocate ( T(ao_num,mo_num) ) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T + + call dgemm('N','N', ao_num, mo_num, ao_num, & + 1.d0, A_ao,LDA_ao, & + mo_coef, size(mo_coef,1), & + 0.d0, T, size(T,1)) + + call dgemm('T','N', mo_num, mo_num, ao_num, & + 1.d0, mo_coef,size(mo_coef,1), & + T, ao_num, & + 0.d0, A_mo, size(A_mo,1)) + + deallocate(T) +end + +subroutine run_sort_by_fock_energies() + + implicit none + + BEGIN_DOC + ! Saves the current MOs ordered by diagonal element of the Fock operator. + END_DOC + + integer :: i,j,k,l,tmp_i,tmp_k,tmp_list_size + integer, allocatable :: iorder(:), tmp_list(:) + double precision, allocatable :: fock_energies_tmp(:), tmp_mo_coef(:,:) + + ! Test + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + print*,'MO class: ',trim(mo_class(tmp_list(1))) + + allocate(iorder(tmp_list_size), fock_energies_tmp(tmp_list_size), tmp_mo_coef(ao_num,tmp_list_size)) + !print*,'MOs before sorting them by f_p^p energies:' + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + fock_energies_tmp(i) = Fock_matrix_diag_mo(tmp_i) + iorder(i) = i + !print*, tmp_i, fock_energies_tmp(i) + enddo + + call dsort(fock_energies_tmp, iorder, tmp_list_size) + + print*,'MOs after sorting them by f_p^p energies:' + do i = 1, tmp_list_size + k = iorder(i) + tmp_k = tmp_list(k) + print*, tmp_k, fock_energies_tmp(k) + do j = 1, ao_num + tmp_mo_coef(j,k) = mo_coef(j,tmp_k) + enddo + enddo + + ! Update the MOs after sorting them by energies + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + do j = 1, ao_num + mo_coef(j,tmp_i) = tmp_mo_coef(j,i) + enddo + enddo + + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + print*,'' + + deallocate(iorder, fock_energies_tmp, tmp_list, tmp_mo_coef) + endif + + enddo + + touch mo_coef + call save_mos + +end + +function is_core(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a core orbital + END_DOC + + integer, intent(in) :: i + logical :: is_core + + integer :: j + + ! Init + is_core = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_core = .True. + exit + endif + enddo + +end + +function is_del(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a deleted orbital + END_DOC + + integer, intent(in) :: i + logical :: is_del + + integer :: j + + ! Init + is_del = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_del = .True. + exit + endif + enddo + +end + +subroutine set_classes_loc() + + implicit none + + integer :: i + logical :: ok1, ok2 + logical :: is_core, is_del + integer(bit_kind) :: res(N_int,2) + + if (auto_mo_class) then + do i = 1, mo_num + if (is_core(i)) cycle + if (is_del(i)) cycle + call apply_hole(psi_det(1,1,1), 1, i, res, ok1, N_int) + call apply_hole(psi_det(1,1,1), 2, i, res, ok2, N_int) + if (ok1 .and. ok2) then + mo_class(i) = 'Inactive' + else if (.not. ok1 .and. .not. ok2) then + mo_class(i) = 'Virtual' + else + mo_class(i) = 'Active' + endif + enddo + touch mo_class + endif + +end + +subroutine unset_classes_loc() + + implicit none + + integer :: i + logical :: ok1, ok2 + logical :: is_core, is_del + integer(bit_kind) :: res(N_int,2) + + if (auto_mo_class) then + do i = 1, mo_num + if (is_core(i)) cycle + if (is_del(i)) cycle + mo_class(i) = 'Active' + enddo + touch mo_class + endif + +end diff --git a/src/mo_localization/org/TANGLE_org_mode.sh b/src/mo_localization/org/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/mo_localization/org/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done diff --git a/src/mo_localization/org/break_spatial_sym.org b/src/mo_localization/org/break_spatial_sym.org new file mode 100644 index 00000000..d82f1c60 --- /dev/null +++ b/src/mo_localization/org/break_spatial_sym.org @@ -0,0 +1,28 @@ +! A small program to break the spatial symmetry of the MOs. + +! You have to defined your MO classes or set security_mo_class to false +! with: +! qp set orbital_optimization security_mo_class false + +! The default angle for the rotations is too big for this kind of +! application, a value between 1e-3 and 1e-6 should break the spatial +! symmetry with just a small change in the energy. + +#+BEGIN_SRC f90 :comments org :tangle break_spatial_sym.irp.f +program break_spatial_sym + + !BEGIN_DOC + ! Break the symmetry of the MOs with a rotation + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + call set_classes_loc + call apply_pre_rotation + call unset_classes_loc + +end +#+END_SRC diff --git a/src/mo_localization/org/debug_gradient_loc.org b/src/mo_localization/org/debug_gradient_loc.org new file mode 100644 index 00000000..6d147dd0 --- /dev/null +++ b/src/mo_localization/org/debug_gradient_loc.org @@ -0,0 +1,67 @@ +#+BEGIN_SRC f90 :comments org :tangle debug_gradient_loc.irp.f +program debug_gradient_loc + + !BEGIN_DOC + ! Check if the gradient is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: v_grad(:), v_grad2(:) + double precision :: norm, max_elem, threshold, max_error + integer :: i, nb_error + + threshold = 1d-12 + + list_size = dim_list_act_orb + + allocate(list(list_size)) + + list = list_act + + n = list_size*(list_size-1)/2 + + allocate(v_grad(n),v_grad2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call gradient_FB(n,list_size,list,v_grad,max_elem,norm) + call gradient_FB_omp(n,list_size,list,v_grad2,max_elem,norm) + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey' + call gradient_PM(n,list_size,list,v_grad,max_elem,norm) + call gradient_PM(n,list_size,list,v_grad2,max_elem,norm) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,v_grad(i) + enddo + + v_grad = v_grad - v_grad2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(v_grad(i)) > threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + if (dabs(v_grad(i)) > max_elem) then + max_elem = v_grad(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(v_grad,v_grad2) + +end +#+END_SRC diff --git a/src/mo_localization/org/debug_hessian_loc.org b/src/mo_localization/org/debug_hessian_loc.org new file mode 100644 index 00000000..e47cf38d --- /dev/null +++ b/src/mo_localization/org/debug_hessian_loc.org @@ -0,0 +1,67 @@ +#+BEGIN_SRC f90 :comments org :tangle debug_hessian_loc.irp.f +program debug_hessian_loc + + !BEGIN_DOC + ! Check if the hessian is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: H(:), H2(:) + double precision :: threshold, max_error, max_elem + integer :: i, nb_error + + threshold = 1d-12 + + list_size = dim_list_act_orb + + allocate(list(list_size)) + + list = list_act + + n = list_size*(list_size-1)/2 + + allocate(H(n),H2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call hessian_FB(n,list_size,list,H) + call hessian_FB_omp(n,list_size,list,H2) + elseif(localization_method == 'pipek') then + print*,'Pipek-Mezey' + call hessian_PM(n,list_size,list,H) + call hessian_PM(n,list_size,list,H2) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,H(i) + enddo + + H = H - H2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(H(i)) > threshold) then + print*,H(i) + nb_error = nb_error + 1 + if (dabs(H(i)) > max_elem) then + max_elem = H(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(H,H2) + +end +#+END_SRC diff --git a/src/mo_localization/org/kick_the_mos.org b/src/mo_localization/org/kick_the_mos.org new file mode 100644 index 00000000..c0c6c02d --- /dev/null +++ b/src/mo_localization/org/kick_the_mos.org @@ -0,0 +1,18 @@ +#+BEGIN_SRC f90 :comments org :tangle kick_the_mos.irp.f +program kick_the_mos + + !BEGIN_DOC + ! To do a small rotation of the MOs + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + call set_classes_loc + call apply_pre_rotation + call unset_classes_loc + +end +#+END_SRC diff --git a/src/mo_localization/org/localization.org b/src/mo_localization/org/localization.org new file mode 100644 index 00000000..aaf9f18d --- /dev/null +++ b/src/mo_localization/org/localization.org @@ -0,0 +1,2899 @@ +* Orbital localization + +Molecular orbitals localization + +** Doc + +The program localizes the orbitals in function of their mo_class: +- core MOs +- inactive MOs +- active MOs +- virtual MOs +- deleted MOs -> no orbital localization + +Core MOs are localized with core MOs, inactives MOs are localized with +inactives MOs and so on. But deleted orbitals are not localized. + +WARNING: +- The user MUST SPECIFY THE MO CLASSES, otherwise if default mo class + is false the localization will be done for all the orbitals between + them, so the occupied and virtual MOs will be combined together + which is clearly not what we want to do. If default lpmo class is true + the localization will be done for the core, occupied and virtual + orbitals, but pay attention the mo_class are not deleted after... +- The mo class is not important (except "deleted") because it is not + link to the kind of MOs for CASSCF or CIPSI. It is just a way to + separate the MOs in order to localize them separetely, for example + to separate the core MOs, the occupied MOs and the virtuals MOs. +- The user MUST CHANGE THE MO CLASSES AFTER THE LOCALIZATION in order + to have the right mo class for his next calculation... + +For more information on the mo_class: +lpqp set_mo_class -h + +*** Foster-Boys localization +Foster-Boys localization: +- cite Foster +Boys, S. F., 1960, Rev. Mod. Phys. 32, 296. +DOI:https://doi.org/10.1103/RevModPhys.32.300 +Boys, S. F., 1966, in Quantum Theory of Atoms, Molecules, +and the Solid State, edited by P.-O. Löwdin (Academic +Press, New York), p. 253. +Daniel A. Kleier, Thomas A. Halgren, John H. Hall Jr., and William +N. Lipscomb, J. Chem. Phys. 61, 3905 (1974) +doi: 10.1063/1.1681683 +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Comput. Chem. 2013, 34, +1456– 1462. DOI: 10.1002/jcc.23281 +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Chem. Theory +Comput. 2012, 8, 9, 3137–3146 +DOI: https://doi.org/10.1021/ct300473g +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Chem. Phys. 137, 224114 +(2012) +DOI: https://doi.org/10.1063/1.4769866 +Nicola Marzari, Arash A. Mostofi, Jonathan R. Yates, Ivo Souza, and David Vanderbilt +Rev. Mod. Phys. 84, 1419 +https://doi.org/10.1103/RevModPhys.84.1419 + +The Foster-Boys localization is a method to generate localized MOs +(LMOs) by minimizing the Foster-Boys criterion: +$$ C_{FB} = \sum_{i=1}^N \left[ < \phi_i | r^2 | \phi_i > - < \phi_i | r | +\phi_i >^2 \right] $$. +In fact it is equivalent to maximise +$$ C_2 = \sum_{i>j, \ i=1}^N \left[ < \phi_i | r | \phi_i > - < +\phi_j | r | \phi_j > \left]^2$$ +or +$$ C_3 = \sum_{i=1}^N \left[ < \phi_i | r | \phi_i > \right]^2.$$ + +Noting +$$A_{ii} = < \phi_i | r^2 | \phi_i > $$ +$$B_{ii} = < \phi_i | r | \phi_i > $$ + +$$ \beta = (B_{pp} - B_{qq})^2 - 4 B_{pq}^2 $$ +$$ \gamma = 4 B_{pq} (B_{pp} - B_{qq}) $$ + +\begin{align*} +C_{FB}(\theta) &= \sum_{i=1}^N \left[ A_{ii} - B_{ii}^2 \right] \\ +&- \left[ A_{pp} - B_{pp}^2 + A_{qq} - B_{qq}^2 \right] \\ +&+ \left[ A_{pp} + A_{qq} - B_{pp}^2 - B_{qq}^2 ++ \frac{1}{4} [(1-\cos(4\theta) \beta + \sin(4\theta) \gamma] \right] \\ +&= C_1(\theta=0) + \frac{1}{4} [(1-\cos(4\theta)) \beta + \sin(4\theta) \gamma] +\end{align*} + +The derivatives are: +\begin{align*} +\frac{\partial C_{FB}(\theta)}{\partial \theta} = \beta \sin(4\theta) + \gamma \cos(4 \theta) +\end{align*} + +\begin{align*} +\frac{\partial^2 C_{FB}(\theta)}{\partial \theta^2} = 4 \beta \cos(4\theta) - 4 \gamma \sin(4 \theta) +\end{align*} + +Similarly: +\begin{align*} +C_3(\theta) &= \sum_{i=1}^N [B_{ii}^2] \\ +&- B_{pp}^2 - B_{qq}^2 \\ +&+ B_{pp}^2 + B_{qq}^2 - \frac{1}{4} [(1-\cos(4\theta) \beta + \sin(4\theta) \gamma] \\ +&= C_3(\theta=0) - \frac{1}{4} [(1-\cos(4\theta)) \beta + \sin(4\theta) \gamma] +\end{align*} + +The derivatives are: +\begin{align*} +\frac{\partial C_3(\theta)}{\partial \theta} = - \beta \sin(4\theta) - \gamma \cos(4 \theta) +\end{align*} + +\begin{align*} +\frac{\partial^2 C_3(\theta)}{\partial \theta^2} = - 4 \beta \cos(4\theta) + 4 \gamma \sin(4 \theta) +\end{align*} + +And since we compute the derivatives around $\theta = 0$ (around the +actual position) we have: +\begin{align*} +\left. \frac{\partial{C_{FB}(\theta)}}{\partial \theta}\right|_{\theta=0} = \gamma +\end{align*} + +\begin{align*} +\left. \frac{\partial^2 C_{FB}(\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta +\end{align*} + +Locality of the orbitals: +- cite Hoyvik +As the Foster-Boys method tries to minimize the sum of the second +moment MO spread, the locality of each MO can be expressed as the +second moment of the MO spread. For the MO i, the locality criterion is +\begin{align*} +\sigma_i &= \sqrt{ - ^2} \\ +&= \sqrt{ - ^2 + - ^2 + - ^2} +\end{align*} + + +*** Pipek-Mezey localization +-cite pipek mezey 1989 +J. Pipek, P. G. Mezey, J. Chem. Phys. 90, 4916 (1989) +DOI: 10.1063/1.456588 + +Foster-Boys localization does not preserve the $\sigma - \pi$ separation of the +MOs, it leads to "banana" orbitals. The Pipek-Mezey localization +normally preserves this separation. + +The optimum functional $\mathcal{P}$ is obtained for the maximum of +$D^{-1}$ +\begin{align*} +\mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ \right]^2 +\end{align*} + +As for the Foster Boys localization, the change in the functional for +the rotation of two MOs can be obtained using very similar terms +\begin{align*} +\beta_{st}^{PM} = \sum_{A=1}^N \left( ^2 - \frac{1}{4} \left[ - \right]^2 \right) +\end{align*} +\begin{align*} +\gamma_{st}^{PM} = \sum_{A=1}^N \left[ - \right] +\end{align*} +The matrix element of the operator $P_A$ are obtained using +\begin{align*} +<\rho | \tilde{\mu}> = \delta_{\rho \mu} +\end{align*} +which leads to +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} +$\sum_{\rho}$ -> sum over all the AOs +$\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +$c^t$ -> expansion coefficient of orbital |t> + +So similarly the first and second derivatives are + +\begin{align*} +\left. \frac{\partial \mathcal{P} (\theta)}{\partial \theta} \right|_{\theta=0}= \gamma^{PM} +\end{align*} + +\begin{align*} +\left. \frac{\partial^2 \mathcal{P} (\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta^{PM} +\end{align*} + +** Localization procedure + +Localization procedure: + +To do the localization we compute the gradient and the +diagonal hessian of the Foster-Boys criterion with respect to the MO +rotations and we minimize it with the Newton method. + +In order to avoid the problem of starting on a saddle point, the +localization procedure starts by giving a little kick in the MOs, by +putting "kick in mos" true, in order to break the symmetry and escape +from a possible saddle point. + +In order to speed up the iteration we compute the gradient, the +diagonal hessian and the step in temporary matrices of the size +(number MOs in mo class by number MOs in mo class) + +** Remarks + +Variables: + +The indexes i and j refere to the positions of the elements in +the "full space", i.e., the arrays containing elements for all the MOs, +but the indexes tmp_i and tmp_j to the positions of the elements in +the "reduced space/subspace", i.e., the arrays containing elements for +a restricted number of MOs. +Example: +The gradient for the localization of the core MOs can be expressed +as a vector of length mo_num*(mo_num-1)/2 with only +n_core_orb*(n_core_orb-1)/2 non zero elements, so it is more relevant +to use a vector of size n_act_orb*(n_core_orb-1)/2. +So here the gradient is a vector of size +tmp_list_size*(tmp_list_size)/2 where tmp_list_size is the number of +MOs is the corresponding mo class. +The same thing happened for the hessian, the matrix containing the +step and the rotation matrix, which are tmp_list_size by tmp_list_size +matrices. + +Ex gradient for 4 core orbitales: +\begin{align*} +\begin{pmatrix} +0 & -a & -b & -d & \hdots & 0 \\ +a & 0 & -c & -e & \hdots & 0 \\ +b & c & 0 & -f & \hdots & 0 \\ +d & e & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +0 \\ +\vdots \\ +0 \\ +\end{pmatrix} +\end{align*} + +\begin{align*} +\begin{pmatrix} +0 & -a & -b & -d & \hdots & 0 \\ +a & 0 & -c & -e & \hdots & 0 \\ +b & c & 0 & -f & \hdots & 0 \\ +d & e & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +0 & -a & -b & -d \\ +a & 0 & -c & -e \\ +b & c & 0 & -f \\ +d & e & f & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +\end{pmatrix} +\end{align*} + +The same thing can be done if indexes of the orbitales are not +consecutives since it's done with lists of MOs: + +\begin{align*} +\begin{pmatrix} +0 & -a & 0 & -b & -d & \hdots & 0 \\ +a & 0 & 0 & -c & -e & \hdots & 0 \\ +0 & 0 & 0 & 0 & 0 & \hdots & 0 \\ +b & c & 0 & 0 & -f & \hdots & 0 \\ +d & e & 0 & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +0 & -a & -b & -d \\ +a & 0 & -c & -e \\ +b & c & 0 & -f \\ +d & e & f & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +\end{pmatrix} +\end{align*} + +The dipoles are updated using the "ao to mo" subroutine without the +"restore symmetry" which is actually in N^4 but can be rewrite in N^2 +log(N^2). +The bottleneck of the program is normally N^3 with the matrix +multiplications/diagonalizations. The use of the full hessian can be +an improvement but it will scale in N^4... + +** Program + +#+BEGIN_SRC f90 org :tangle localization.irp.f +program localization + + implicit none + + call set_classes_loc + call run_localization + call unset_classes_loc + +end +#+END_SRC + + +Variables: +| pre_rot(mo_num, mo_num) | double precision | Matrix for the pre rotation | +| R(mo_num,mo_num) | double precision | Rotation matrix | +| tmp_R(:,:) | double precision | Rottation matrix in a subsapce | +| prev_mos(ao_num, mo_num) | double precision | Previous mo_coef | +| spatial_extent(mo_num) | double precision | Spatial extent of the orbitals | +| criterion | double precision | Localization criterion | +| prev_criterion | double precision | Previous criterion | +| criterion_model | double precision | Estimated next criterion | +| rho | double precision | Ratio to measure the agreement between the model | +| | | and the reality | +| delta | double precision | Radisu of the trust region | +| norm_grad | double precision | Norm of the gradient | +| info | integer | for dsyev from Lapack | +| max_elem | double precision | maximal element in the gradient | +| v_grad(:) | double precision | Gradient | +| H(:,:) | double precision | Hessian (diagonal) | +| e_val(:) | double precision | Eigenvalues of the hessian | +| W(:,:) | double precision | Eigenvectors of the hessian | +| tmp_x(:) | double precision | Step in 1D (in a subaspace) | +| tmp_m_x(:,:) | double precision | Step in 2D (in a subaspace) | +| tmp_list(:) | double precision | List of MOs in a mo_class | +| i,j,k | integer | Indexes in the full MO space | +| tmp_i, tmp_j, tmp_k | integer | Indexes in a subspace | +| l | integer | Index for the mo_class | +| key(:) | integer | Key to sort the eigenvalues of the hessian | +| nb_iter | integer | Number of iterations | +| must_exit | logical | To exit the trust region loop | +| cancel_step | logical | To cancel a step | +| not_*converged | logical | To localize the different mo classes | +| t* | double precision | To measure the time | +| n | integer | mo_num*(mo_num-1)/2, number of orbital parameters | +| tmp_n | integer | dim_subspace*(dim_subspace-1)/2 | +| | | Number of dimension in the subspace | + +Variables in qp_edit for the localization: +| localization_method | +| localization_max_nb_iter | +| default_mo_class | +| thresh_loc_max_elem_grad | +| kick_in_mos | +| angle_pre_rot | + ++ all the variables for the trust region + +Cf. qp_edit orbital optimization + +#+BEGIN_SRC f90 :comments org :tangle localization.irp.f +subroutine run_localization + + include 'pi.h' + + BEGIN_DOC + ! Orbital localization + END_DOC + + implicit none + + ! Variables + double precision, allocatable :: pre_rot(:,:), R(:,:) + double precision, allocatable :: prev_mos(:,:), spatial_extent(:), tmp_R(:,:) + double precision :: criterion, norm_grad + integer :: i,j,k,l,p, tmp_i, tmp_j, tmp_k + integer :: info + integer :: n, tmp_n, tmp_list_size + double precision, allocatable :: v_grad(:), H(:), tmp_m_x(:,:), tmp_x(:),W(:),e_val(:) + double precision :: max_elem, t1, t2, t3, t4, t5, t6 + integer, allocatable :: tmp_list(:), key(:) + double precision :: prev_criterion, rho, delta, criterion_model + integer :: nb_iter, nb_sub_iter + logical :: not_converged, not_core_converged + logical :: not_act_converged, not_inact_converged, not_virt_converged + logical :: use_trust_region, must_exit, cancel_step,enforce_step_cancellation + + n = mo_num*(mo_num-1)/2 + + ! Allocation + allocate(spatial_extent(mo_num)) + allocate(pre_rot(mo_num, mo_num), R(mo_num, mo_num)) + allocate(prev_mos(ao_num, mo_num)) + + ! Locality before the localization + call compute_spatial_extent(spatial_extent) + + ! Choice of the method + print*,'' + print*,'Localization method:',localization_method + if (localization_method == 'boys') then + print*,'Foster-Boys localization' + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey localization' + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + print*,'' + + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### Before the pre rotation' + + ! Debug + if (debug_hf) then + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + if (tmp_list_size >= 2) then + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, mo_class(tmp_list(1)) + endif + + deallocate(tmp_list) + + enddo + + ! Debug + !print*,'HF', HF_energy + +#+END_SRC + +** Loc +#+BEGIN_SRC f90 :comments org :tangle localization.irp.f + ! Pre rotation, to give a little kick in the MOs + call apply_pre_rotation() + + ! Criterion after the pre rotation + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### After the pre rotation' + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, trim(mo_class(tmp_list(1))) + + deallocate(tmp_list) + endif + + enddo + + ! Debug + !print*,'HF', HF_energy + + print*,'' + print*,'========================' + print*,' Orbital localization' + print*,'========================' + print*,'' + + !Initialization + not_converged = .TRUE. + + ! To do the localization only if there is at least 2 MOs + if (dim_list_core_orb >= 2) then + not_core_converged = .TRUE. + else + not_core_converged = .FALSE. + endif + + if (dim_list_act_orb >= 2) then + not_act_converged = .TRUE. + else + not_act_converged = .FALSE. + endif + + if (dim_list_inact_orb >= 2) then + not_inact_converged = .TRUE. + else + not_inact_converged = .FALSE. + endif + + if (dim_list_virt_orb >= 2) then + not_virt_converged = .TRUE. + else + not_virt_converged = .FALSE. + endif + + ! Loop over the mo_classes + do l = 1, 4 + + if (l==1) then ! core + not_converged = not_core_converged + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + not_converged = not_act_converged + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + not_converged = not_inact_converged + tmp_list_size = dim_list_inact_orb + else ! virt + not_converged = not_virt_converged + tmp_list_size = dim_list_virt_orb + endif + + ! Next iteration if converged = true + if (.not. not_converged) then + cycle + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + ! Display + if (not_converged) then + print*,'' + print*,'###', trim(mo_class(tmp_list(1))), 'MOs ###' + print*,'' + endif + + ! Size for the 2D -> 1D transformation + tmp_n = tmp_list_size * (tmp_list_size - 1)/2 + + ! Without hessian + trust region + if (.not. localization_use_hessian) then + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size), tmp_x(tmp_n)) + + ! Criterion + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Init + nb_iter = 0 + delta = 1d0 + + !Loop + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Angles of rotation + call theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + tmp_m_x = - tmp_m_x * delta + + ! Rotation submatrix + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + ! To ensure that the rotation matrix is unitary + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + delta = delta * 0.5d0 + cycle + else + delta = min(delta * 2d0, 1d0) + endif + + ! Full rotation matrix and application of the rotation + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + call apply_mo_rotation(R, prev_mos) + + ! Update the needed data + call update_data_localization() + + ! New criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + print*,'Max elem :', max_elem + print*,'Delta :', delta + + nb_iter = nb_iter + 1 + + ! Exit + if (nb_iter >= localization_max_nb_iter .or. dabs(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + enddo + + ! Save the changes + call update_data_localization() + call save_mos() + TOUCH mo_coef + + ! Deallocate + deallocate(v_grad, tmp_m_x, tmp_list) + deallocate(tmp_R, tmp_x) + + ! Trust region + else + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), H(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size)) + allocate(tmp_x(tmp_n), W(tmp_n), e_val(tmp_n), key(tmp_n)) + + ! ### Initialization ### + delta = 0d0 ! can be deleted (normally) + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must be 0.5 + + ! Compute the criterion before the loop + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Loop until the convergence + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Gradient + call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + ! Diagonal hessian + call hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + ! Diagonalization of the diagonal hessian by hands + !call diagonalization_hessian(tmp_n,H,e_val,w) + do i = 1, tmp_n + e_val(i) = H(i) + enddo + + ! Key list for dsort + do i = 1, tmp_n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, tmp_n) + + ! Eigenvectors + W = 0d0 + do i = 1, tmp_n + W(i) = dble(key(i)) + enddo + + ! To enter in the loop just after + cancel_step = .True. + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'-----------------------------' + print*, mo_class(tmp_list(1)) + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'Max elem grad:', max_elem + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,1, H, W, e_val, v_grad, prev_criterion, & + rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + ! Internal loop exit condition + if (must_exit) then + print*,'trust_region_step_w_expected_e sent: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! Update the things related to mo_coef + call update_data_localization() + + ! Update the criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, & + criterion_model, rho, cancel_step) + + ! Cancellation of the step, previous MOs + if (cancel_step) then + mo_coef = prev_mos + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + !call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exti = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + if (nb_iter > localization_max_nb_iter) then + not_converged = .False. + endif + enddo + + ! Deallocation of temporary arrays + deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key) + + ! Save the MOs + call save_mos() + TOUCH mo_coef + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + endif + enddo + + ! Seems unecessary + TOUCH mo_coef + + ! To sort the MOs using the diagonal elements of the Fock matrix + if (sort_mos_by_e) then + call run_sort_by_fock_energies() + endif + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + ! Locality after the localization + call compute_spatial_extent(spatial_extent) + +end +#+END_SRC + +** Gathering +Gradient/hessian/criterion for the localization: +They are chosen in function of the localization method + +Gradient: + +qp_edit : +| localization_method | method for the localization | + +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the gradient of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + + if (localization_method == 'boys') then + call gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + !call gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + elseif (localization_method== 'pipek') then + call gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + else + print*,'Unkown method:'//localization_method + call abort + endif + +end +#+END_SRC + +Hessian: + +Output: +| H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + + if (localization_method == 'boys') then + call hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + !call hessian_FB(tmp_n, tmp_list_size, tmp_list, H) ! non OMP for debugging + elseif (localization_method == 'pipek') then + call hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end +#+END_SRC + +Criterion: + +Output: +| criterion | double precision | Criterion for the orbital localization | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_localization(tmp_list_size, tmp_list,criterion) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the localization criterion of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + + if (localization_method == 'boys') then + call criterion_FB(tmp_list_size, tmp_list, criterion) + elseif (localization_method == 'pipek') then + !call criterion_PM(tmp_list_size, tmp_list,criterion) + call criterion_PM_v3(tmp_list_size, tmp_list, criterion) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end +#+END_SRC + +Subroutine to update the datas needed for the localization +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine update_data_localization() + + include 'pi.h' + + implicit none + + if (localization_method == 'boys') then + ! Update the dipoles + call ao_to_mo_no_sym(ao_dipole_x, ao_num, mo_dipole_x, mo_num) + call ao_to_mo_no_sym(ao_dipole_y, ao_num, mo_dipole_y, mo_num) + call ao_to_mo_no_sym(ao_dipole_z, ao_num, mo_dipole_z, mo_num) + elseif (localization_method == 'pipek') then + ! Nothing required + else + print*,'Unkown method: '//localization_method + call abort + endif +end +#+END_SRC + +Angles: + +Output: +| tmp_m_x(tmp_list_size, tmp_list_size) | double precision | Angles for the rotations in the subspace | +| max_elem | double precision | Maximal angle | + + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the rotation angles between the MOs for the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: tmp_m_x(tmp_list_size,tmp_list_size), max_elem + + if (localization_method == 'boys') then + call theta_FB(tmp_list, tmp_list_size, tmp_m_x, max_elem) + elseif (localization_method== 'pipek') then + call theta_PM(tmp_list, tmp_list_size, tmp_m_x, max_elem) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end +#+END_SRC + +** Foster-Boys +*** Gradient +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +| m_grad(tmp_n,tmp_n) | double precision | Gradient in the matrix form | +| i,j,k | integer | indexes in the full space | +| tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +| t* | double precision | to compute the time | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB:', t3 + + print*,'---End gradient_FB---' + +end subroutine +#+END_SRC + +*** Gradient (OMP) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + use omp_lib + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB_omp---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,m_grad,v_grad,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + !$OMP END DO + + ! 2D -> 1D + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB_omp:', t3 + + print*,'---End gradient_FB_omp---' + +end subroutine +#+END_SRC + +*** Hessian + +Output: +| H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +Internal: +| beta(tmp_n,tmp_n) | double precision | beta in the documentation below to compute the hesian | +| i,j,k | integer | indexes in the full space | +| tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +| t* | double precision | to compute the time | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB---' + + call wall_time(t1) + + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + + ! Diagonal of the hessian + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB:', t3 + + print*,'---End hessian_FB---' + +end subroutine +#+END_SRC + +*** Hessian (OMP) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i,tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB_omp---' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,beta,H,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + !$OMP END DO + + ! Initialization + !$OMP DO + do i = 1, tmp_n + H(i) = 0d0 + enddo + !$OMP END DO + + ! Diagonalm of the hessian + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB_omp:', t3 + + print*,'---End hessian_FB_omp---' + +end subroutine +#+END_SRC + +** Pipek-Mezey +*** Gradient v1 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size)) + + ! Initialization + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Gradient + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int) + +end subroutine grad_pipek +#+END_SRC + +*** Gradient + +The gradient is + +\begin{align*} +\left. \frac{\partial \mathcal{P} (\theta)}{\partial \theta} \right|_{\theta=0}= \gamma^{PM} +\end{align*} +with +\begin{align*} +\gamma_{st}^{PM} = \sum_{A=1}^N \left[ - \right] +\end{align*} + +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} +$\sum_{\rho}$ -> sum over all the AOs +$\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +$c^t$ -> expansion coefficient of orbital |t> + +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +| m_grad(tmp_list_size,tmp_list_size) | double precision | Gradient in a 2D array | +| tmp_int(tmp_list_size,tmp_list_size) | | Temporary array to store the integrals | +| tmp_accu(tmp_list_size,tmp_list_size) | | Temporary array to store a matrix | +| | | product and compute tmp_int | +| CS(tmp_list_size,ao_num) | | Array to store the result of mo_coef * ao_overlap | +| tmp_mo_coef(ao_num,tmp_list_size) | | Array to store just the useful MO coefficients | +| | | depending of the mo_class | +| tmp_mo_coef2(nucl_n_aos(a),tmp_list_size) | | Array to store just the useful MO coefficients | +| | | depending of the nuclei | +| tmp_CS(tmp_list_size,nucl_n_aos(a)) | | Array to store just the useful mo_coef * ao_overlap | +| | | values depending of the nuclei | +| a | | index to loop over the nuclei | +| b | | index to loop over the AOs which belongs to the nuclei a | +| mu | | index to refer to an AO which belongs to the nuclei a | +| rho | | index to loop over all the AOs | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + double precision :: t1,t2,t3 + + print*,'' + print*,'---gradient_PM---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int,CS,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_PM:', t3 + + print*,'---End gradient_PM---' + +end +#+END_SRC + +*** Hessian v1 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size)) + + beta = 0d0 + + do a = 1, nucl_num + tmp_int = 0d0 + + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta,tmp_int) + +end +#+END_SRC + +*** Hessian + +The hessian is +\begin{align*} +\left. \frac{\partial^2 \mathcal{P} (\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta^{PM} +\end{align*} +\begin{align*} +\beta_{st}^{PM} = \sum_{A=1}^N \left( ^2 - \frac{1}{4} \left[ - \right]^2 \right) +\end{align*} + +with +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} +$\sum_{\rho}$ -> sum over all the AOs +$\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +$c^t$ -> expansion coefficient of orbital |t> + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:),CS(:,:),tmp_mo_coef(:,:),tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_PM---' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size),tmp_accu(tmp_list_size,tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + beta = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta,tmp_int) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_PM:', t3 + + print*,'---End hessian_PM---' + +end + +#+END_SRC + +** Criterion +*** Criterion PM (old) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_crit_pipek(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(tmp_int(mo_num, mo_num)) + + criterion = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do i = 1, mo_num + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(i,i) = tmp_int(i,i) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,i) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,i)) + + enddo + enddo + enddo + + do i = 1, mo_num + criterion = criterion + tmp_int(i,i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int) + +end +#+END_SRC + +*** Criterion PM + +The criterion is computed as +\begin{align*} +\mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ \right]^2 +\end{align*} +with +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_PM(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:),CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + print*,'' + print*,'---criterion_PM---' + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),CS(mo_num,ao_num)) + + ! Initialization + criterion = 0d0 + + call dgemm('T','N',mo_num,ao_num,ao_num,1d0,mo_coef,size(mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_i) = tmp_int(tmp_i,tmp_i) + 0.5d0 * (CS(i,mu) * mo_coef(mu,i) + mo_coef(mu,i) * CS(i,mu)) + + ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS) + + print*,'---End criterion_PM---' + +end +#+END_SRC + +*** Criterion PM v3 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho,nu,c + double precision :: t1,t2,t3 + + print*,'' + print*,'---criterion_PM_v3---' + + call wall_time(t1) + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + criterion = 0d0 + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + ! ao_overlap(ao_num,ao_num) + ! mo_coef(ao_num,mo_num) + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + + do j = 1, tmp_list_size + do i = 1, tmp_list_size + tmp_int(i,j) = 0d0 + enddo + enddo + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + ! Integrals + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Criterion + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS,tmp_accu,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in criterion_PM_v3:', t3 + + print*,'---End criterion_PM_v3---' + +end +#+END_SRC + +*** Criterion FB (old) + +The criterion is just computed as + +\begin{align*} +C = - \sum_i^{mo_{num}} (^2 + ^2 + ^2) +\end{align*} + +The minus sign is here in order to minimize this criterion + +Output: +| criterion | double precision | criterion for the Foster-Boys localization | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_FB_old(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + double precision, intent(out) :: criterion + integer :: i + + ! Criterion (= \sum_i ^2 ) + criterion = 0d0 + do i = 1, mo_num + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine +#+END_SRC + +*** Criterion FB +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_FB(tmp_list_size, tmp_list, criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + integer :: i, tmp_i + + ! Criterion (= - \sum_i ^2 ) + criterion = 0d0 + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine +#+END_SRC + +** Theta + +In: +| n | integer | number of MOs in the considered MO class | +| l | integer | list of MOs of the considered class | + +Out: +| m_x(n,n) | double precision | Matrix containing the rotation angle between all the different | +| | | pairs of MOs to apply the rotations (need a minus sign) | +| max_elem | double precision | Maximal angle in absolute value | + +$$\cos(4 \theta) = \frac{-A{ij}}{\sqrt{(A_{ij}^2 + B_{ij}^2)} $$ +$$\sin(4 \theta) = \frac{B{ij}}{\sqrt{(A_{ij}^2 + B_{ij}^2)} $$ +$$\tan(4 \theta) = \frac{\sin(4 \theta)}{\cos(4 \theta)}$$ +where $\theta$ is in fact $\theta_{ij}$ + +For Foster-Boys localization: +$$A_{ij} = ^2 - \frac{1}{4} ( - )^2$$ +$$B_{ij} = ( - )$$ + + +For Pipek-Mezey localization: +$$A_{ij} = \sum_A ^2 - \frac{1}{4} ( - )^2$$ +$$B_{ij} = \sum_A ( - )$$ +with +$$ = \frac{1}{2} \sum_\rho \sum_{\mu \in A} ( c_\rho^{i*} S_{\rho +\mu} c_\mu^j + c_\mu^{i*} S_{\mu \rho} c_\rho^j)$$ +$i,j$ MOs +$\mu, \rho$ AOs +$A$ nucleus +$S$ overlap matrix +$c$ MO coefficient +$r$ position operator + +#+begin_src f90 :tangle localization_sub.irp.f +subroutine theta_FB(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: i,j, tmp_i, tmp_j + double precision, allocatable :: cos4theta(:,:), sin4theta(:,:) + double precision, allocatable :: A(:,:), B(:,:), beta(:,:), gamma(:,:) + integer :: idx_i,idx_j + + allocate(cos4theta(n, n), sin4theta(n, n)) + allocate(A(n,n), B(n,n), beta(n,n), gamma(n,n)) + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + A(tmp_i,tmp_j) = mo_dipole_x(i,j)**2 - 0.25d0 * (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 & + + mo_dipole_y(i,j)**2 - 0.25d0 * (mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 & + + mo_dipole_z(i,j)**2 - 0.25d0 * (mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 + enddo + A(j,j) = 0d0 + enddo + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + B(tmp_i,tmp_j) = mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j)) - 4d0 * mo_dipole_x(i,j)**2 & + ! + (mo_dipole_y(i,i) - mo_dipole_y(j,j)) - 4d0 * mo_dipole_y(i,j)**2 & + ! + (mo_dipole_z(i,i) - mo_dipole_z(j,j)) - 4d0 * mo_dipole_z(i,j)**2 + ! enddo + !enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! gamma(tmp_i,tmp_j) = 4d0 * ( mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + ! + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + ! + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j))) + ! enddo + !enddo + + ! + !do j = 1, n + ! do i = 1, n + ! cos4theta(i,j) = - A(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + !do j = 1, n + ! do i = 1, n + ! sin4theta(i,j) = B(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(B(i,j), -A(i,j)) + !m_x(i,j) = 0.25d0 * atan2(sin4theta(i,j), cos4theta(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + !idx_i = i + !idx_j = j + endif + enddo + enddo + + ! Debug + !print*,'' + !print*,'sin/B' + !do i = 1, n + ! write(*,'(100F10.4)') sin4theta(i,:) + ! !B(i,:) + !enddo + !print*,'cos/A' + !do i = 1, n + ! write(*,'(100F10.4)') cos4theta(i,:) + ! !A(i,:) + !enddo + !print*,'X' + !!m_x = 0d0 + !!m_x(idx_i,idx_j) = max_elem + !!m_x(idx_j,idx_i) = -max_elem + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(cos4theta, sin4theta) + deallocate(A,B,beta,gamma) + +end +#+end_src + +#+begin_src f90 :comments org :tangle localization_sub.irp.f +subroutine theta_PM(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: a,b,i,j,tmp_i,tmp_j,rho,mu,nu,idx_i,idx_j + double precision, allocatable :: Aij(:,:), Bij(:,:), Pa(:,:) + + allocate(Aij(n,n), Bij(n,n), Pa(n,n)) + + do a = 1, nucl_num ! loop over the nuclei + Pa = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + Pa(tmp_i,tmp_j) = Pa(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! A + do j = 1, n + do i = 1, n + Aij(i,j) = Aij(i,j) + Pa(i,j)**2 - 0.25d0 * (Pa(i,i) - Pa(j,j))**2 + enddo + enddo + + ! B + do j = 1, n + do i = 1, n + Bij(i,j) = Bij(i,j) + Pa(i,j) * (Pa(i,i) - Pa(j,j)) + enddo + enddo + + enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(Bij(i,j), -Aij(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + idx_i = i + idx_j = j + endif + enddo + enddo + + ! Debug + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,'Max',idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(Aij,Bij,Pa) + +end +#+end_src + +** Spatial extent + +The spatial extent of an orbital $i$ is computed as +\begin{align*} +\sum_{\lambda=x,y,z}\sqrt{ - ^2} +\end{align*} + +From that we can also compute the average and the standard deviation + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_spatial_extent(spatial_extent) + + implicit none + + BEGIN_DOC + ! Compute the spatial extent of the MOs + END_DOC + + double precision, intent(out) :: spatial_extent(mo_num) + double precision :: average_core, average_act, average_inact, average_virt + double precision :: std_var_core, std_var_act, std_var_inact, std_var_virt + integer :: i,j,k,l + + spatial_extent = 0d0 + + do i = 1, mo_num + spatial_extent(i) = mo_spread_x(i,i) - mo_dipole_x(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_y(i,i) - mo_dipole_y(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_z(i,i) - mo_dipole_z(i,i)**2 + enddo + + do i = 1, mo_num + spatial_extent(i) = dsqrt(spatial_extent(i)) + enddo + + average_core = 0d0 + std_var_core = 0d0 + if (dim_list_core_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core) + call compute_std_var_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core, std_var_core) + endif + + average_act = 0d0 + std_var_act = 0d0 + if (dim_list_act_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act) + call compute_std_var_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act, std_var_act) + endif + + average_inact = 0d0 + std_var_inact = 0d0 + if (dim_list_inact_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact) + call compute_std_var_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact, std_var_inact) + endif + + average_virt = 0d0 + std_var_virt = 0d0 + if (dim_list_virt_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt) + call compute_std_var_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt, std_var_virt) + endif + + print*,'' + print*,'=============================' + print*,' Spatial extent of the MOs' + print*,'=============================' + print*,'' + + print*, 'elec_num:', elec_num + print*, 'elec_alpha_num:', elec_alpha_num + print*, 'elec_beta_num:', elec_beta_num + print*, 'core:', dim_list_core_orb + print*, 'act:', dim_list_act_orb + print*, 'inact:', dim_list_inact_orb + print*, 'virt:', dim_list_virt_orb + print*, 'mo_num:', mo_num + print*,'' + + print*,'-- Core MOs --' + print*,'Average:', average_core + print*,'Std var:', std_var_core + print*,'' + + print*,'-- Active MOs --' + print*,'Average:', average_act + print*,'Std var:', std_var_act + print*,'' + + print*,'-- Inactive MOs --' + print*,'Average:', average_inact + print*,'Std var:', std_var_inact + print*,'' + + print*,'-- Virtual MOs --' + print*,'Average:', average_virt + print*,'Std var:', std_var_virt + print*,'' + + print*,'Spatial extent:' + do i = 1, mo_num + print*, i, spatial_extent(i) + enddo + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_average_sp_ext(spatial_extent, list, list_size, average) + + implicit none + + BEGIN_DOC + ! Compute the average spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(out) :: average + integer :: i, tmp_i + + average = 0d0 + do tmp_i = 1, list_size + i = list(tmp_i) + average = average + spatial_extent(i) + enddo + + average = average / DBLE(list_size) + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_std_var_sp_ext(spatial_extent, list, list_size, average, std_var) + + implicit none + + BEGIN_DOC + ! Compute the standard deviation of the spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(in) :: average + double precision, intent(out) :: std_var + integer :: i, tmp_i + + std_var = 0d0 + + do tmp_i = 1, list_size + i = list(tmp_i) + std_var = std_var + (spatial_extent(i) - average)**2 + enddo + + std_var = dsqrt(1d0/DBLE(list_size) * std_var) + +end +#+END_SRC + +** Utils + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine apply_pre_rotation() + + implicit none + + BEGIN_DOC + ! Apply a rotation between the MOs + END_DOC + + double precision, allocatable :: pre_rot(:,:), prev_mos(:,:), R(:,:) + double precision :: t1,t2,t3 + integer :: i,j,tmp_i,tmp_j + integer :: info + logical :: enforce_step_cancellation + + print*,'---apply_pre_rotation---' + call wall_time(t1) + + allocate(pre_rot(mo_num,mo_num), prev_mos(ao_num,mo_num), R(mo_num,mo_num)) + + ! Initialization of the matrix + pre_rot = 0d0 + + if (kick_in_mos) then + ! Pre rotation for core MOs + if (dim_list_core_orb >= 2) then + do tmp_j = 1, dim_list_core_orb + j = list_core(tmp_j) + do tmp_i = 1, dim_list_core_orb + i = list_core(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for active MOs + if (dim_list_act_orb >= 2) then + do tmp_j = 1, dim_list_act_orb + j = list_act(tmp_j) + do tmp_i = 1, dim_list_act_orb + i = list_act(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for inactive MOs + if (dim_list_inact_orb >= 2) then + do tmp_j = 1, dim_list_inact_orb + j = list_inact(tmp_j) + do tmp_i = 1, dim_list_inact_orb + i = list_inact(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for virtual MOs + if (dim_list_virt_orb >= 2) then + do tmp_j = 1, dim_list_virt_orb + j = list_virt(tmp_j) + do tmp_i = 1, dim_list_virt_orb + i = list_virt(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Nothing for deleted ones + + ! Compute pre rotation matrix from pre_rot + call rotation_matrix(pre_rot,mo_num,R,mo_num,mo_num,info,enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Cancellation of the pre rotation, too big error in the rotation matrix' + print*, 'Reduce the angle for the pre rotation, abort' + call abort + endif + + ! New Mos (we don't car eabout the previous MOs prev_mos) + call apply_mo_rotation(R,prev_mos) + + ! Update the things related to mo_coef + TOUCH mo_coef + call save_mos + endif + + deallocate(pre_rot, prev_mos, R) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in apply_pre_rotation:', t3 + print*,'---End apply_pre_rotation---' + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine x_tmp_orb_loc_v2(tmp_n, tmp_list_size, tmp_list, v_grad, H,tmp_x, tmp_m_x) + + implicit none + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(in) :: v_grad(tmp_n) + double precision, intent(in) :: H(tmp_n, tmp_n) + double precision, intent(out) :: tmp_m_x(tmp_list_size, tmp_list_size), tmp_x(tmp_list_size) + !double precision, allocatable :: x(:) + double precision :: lambda , accu, max_elem + integer :: i,j,tmp_i,tmp_j,tmp_k + + ! Allocation + !allocate(x(tmp_n)) + + ! Level shifted hessian + lambda = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < lambda) then + lambda = H(tmp_k,tmp_k) + endif + enddo + + ! min element in the hessian + if (lambda < 0d0) then + lambda = -lambda + 1d-6 + endif + + print*, 'lambda', lambda + + ! Good + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) > 1d-6) then + tmp_x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * v_grad(tmp_k)!(-v_grad(tmp_k)) + !x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * (-v_grad(tmp_k)) + endif + enddo + + ! 1D tmp -> 2D tmp + tmp_m_x = 0d0 + do tmp_j = 1, tmp_list_size - 1 + do tmp_i = tmp_j + 1, tmp_list_size + call mat_to_vec_index(tmp_i,tmp_j,tmp_k) + tmp_m_x(tmp_i, tmp_j) = tmp_x(tmp_k)!x(tmp_k) + enddo + enddo + + ! Antisym + do tmp_i = 1, tmp_list_size - 1 + do tmp_j = tmp_i + 1, tmp_list_size + tmp_m_x(tmp_i,tmp_j) = - tmp_m_x(tmp_j,tmp_i) + enddo + enddo + + ! Deallocation + !deallocate(x) + +end subroutine +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine ao_to_mo_no_sym(A_ao,LDA_ao,A_mo,LDA_mo) + implicit none + BEGIN_DOC + ! Transform A from the |AO| basis to the |MO| basis + ! + ! $C^\dagger.A_{ao}.C$ + END_DOC + integer, intent(in) :: LDA_ao,LDA_mo + double precision, intent(in) :: A_ao(LDA_ao,ao_num) + double precision, intent(out) :: A_mo(LDA_mo,mo_num) + double precision, allocatable :: T(:,:) + + allocate ( T(ao_num,mo_num) ) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T + + call dgemm('N','N', ao_num, mo_num, ao_num, & + 1.d0, A_ao,LDA_ao, & + mo_coef, size(mo_coef,1), & + 0.d0, T, size(T,1)) + + call dgemm('T','N', mo_num, mo_num, ao_num, & + 1.d0, mo_coef,size(mo_coef,1), & + T, ao_num, & + 0.d0, A_mo, size(A_mo,1)) + + deallocate(T) +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine run_sort_by_fock_energies() + + implicit none + + BEGIN_DOC + ! Saves the current MOs ordered by diagonal element of the Fock operator. + END_DOC + + integer :: i,j,k,l,tmp_i,tmp_k,tmp_list_size + integer, allocatable :: iorder(:), tmp_list(:) + double precision, allocatable :: fock_energies_tmp(:), tmp_mo_coef(:,:) + + ! Test + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + print*,'MO class: ',trim(mo_class(tmp_list(1))) + + allocate(iorder(tmp_list_size), fock_energies_tmp(tmp_list_size), tmp_mo_coef(ao_num,tmp_list_size)) + !print*,'MOs before sorting them by f_p^p energies:' + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + fock_energies_tmp(i) = Fock_matrix_diag_mo(tmp_i) + iorder(i) = i + !print*, tmp_i, fock_energies_tmp(i) + enddo + + call dsort(fock_energies_tmp, iorder, tmp_list_size) + + print*,'MOs after sorting them by f_p^p energies:' + do i = 1, tmp_list_size + k = iorder(i) + tmp_k = tmp_list(k) + print*, tmp_k, fock_energies_tmp(k) + do j = 1, ao_num + tmp_mo_coef(j,k) = mo_coef(j,tmp_k) + enddo + enddo + + ! Update the MOs after sorting them by energies + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + do j = 1, ao_num + mo_coef(j,tmp_i) = tmp_mo_coef(j,i) + enddo + enddo + + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + print*,'' + + deallocate(iorder, fock_energies_tmp, tmp_list, tmp_mo_coef) + endif + + enddo + + touch mo_coef + call save_mos + +end + +#+END_SRC + + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +function is_core(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a core orbital + END_DOC + + integer, intent(in) :: i + logical :: is_core + + integer :: j + + ! Init + is_core = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_core = .True. + exit + endif + enddo + +end + +function is_del(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a deleted orbital + END_DOC + + integer, intent(in) :: i + logical :: is_del + + integer :: j + + ! Init + is_del = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_del = .True. + exit + endif + enddo + +end + +subroutine set_classes_loc() + + implicit none + + integer :: i + logical :: ok1, ok2 + logical :: is_core, is_del + integer(bit_kind) :: res(N_int,2) + + if (auto_mo_class) then + do i = 1, mo_num + if (is_core(i)) cycle + if (is_del(i)) cycle + call apply_hole(psi_det(1,1,1), 1, i, res, ok1, N_int) + call apply_hole(psi_det(1,1,1), 2, i, res, ok2, N_int) + if (ok1 .and. ok2) then + mo_class(i) = 'Inactive' + else if (.not. ok1 .and. .not. ok2) then + mo_class(i) = 'Virtual' + else + mo_class(i) = 'Active' + endif + enddo + touch mo_class + endif + +end + +subroutine unset_classes_loc() + + implicit none + + integer :: i + logical :: ok1, ok2 + logical :: is_core, is_del + integer(bit_kind) :: res(N_int,2) + + if (auto_mo_class) then + do i = 1, mo_num + if (is_core(i)) cycle + if (is_del(i)) cycle + mo_class(i) = 'Active' + enddo + touch mo_class + endif + +end +#+END_SRC From b71888f459d7407d7589f001a4d25418dde9df63 Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 18 Apr 2023 13:56:30 +0200 Subject: [PATCH 07/29] add mo optimization --- src/mo_optimization/83.mo_optimization.bats | 62 + src/mo_optimization/EZFIO.cfg | 29 + src/mo_optimization/NEED | 7 + src/mo_optimization/README.md | 74 + src/mo_optimization/class.irp.f | 12 + src/mo_optimization/constants.h | 1 + .../debug_gradient_list_opt.irp.f | 78 + src/mo_optimization/debug_gradient_opt.irp.f | 76 + .../debug_hessian_list_opt.irp.f | 147 ++ src/mo_optimization/debug_hessian_opt.irp.f | 171 ++ .../diagonal_hessian_list_opt.irp.f | 1556 ++++++++++++++++ .../diagonal_hessian_opt.irp.f | 1511 ++++++++++++++++ .../diagonalization_hessian.irp.f | 136 ++ .../first_diagonal_hessian_list_opt.irp.f | 372 ++++ .../first_diagonal_hessian_opt.irp.f | 344 ++++ .../first_gradient_list_opt.irp.f | 125 ++ src/mo_optimization/first_gradient_opt.irp.f | 128 ++ .../first_hessian_list_opt.irp.f | 365 ++++ src/mo_optimization/first_hessian_opt.irp.f | 360 ++++ src/mo_optimization/gradient_list_opt.irp.f | 381 ++++ src/mo_optimization/gradient_opt.irp.f | 346 ++++ src/mo_optimization/hessian_list_opt.irp.f | 1129 ++++++++++++ src/mo_optimization/hessian_opt.irp.f | 1043 +++++++++++ src/mo_optimization/my_providers.irp.f | 141 ++ src/mo_optimization/orb_opt.irp.f | 22 + src/mo_optimization/org/TANGLE_org_mode.sh | 7 + src/mo_optimization/org/TODO.org | 17 + .../org/debug_gradient_list_opt.org | 79 + .../org/debug_gradient_opt.org | 77 + .../org/debug_hessian_list_opt.org | 148 ++ src/mo_optimization/org/debug_hessian_opt.org | 172 ++ .../org/diagonal_hessian_list_opt.org | 1561 +++++++++++++++++ .../org/diagonal_hessian_opt.org | 1516 ++++++++++++++++ .../org/diagonalization_hessian.org | 138 ++ .../org/first_diagonal_hessian_list_opt.org | 376 ++++ .../org/first_diagonal_hessian_opt.org | 348 ++++ .../org/first_gradient_list_opt.org | 127 ++ .../org/first_gradient_opt.org | 130 ++ .../org/first_hessian_list_opt.org | 370 ++++ src/mo_optimization/org/first_hessian_opt.org | 365 ++++ src/mo_optimization/org/gradient_list_opt.org | 393 +++++ src/mo_optimization/org/gradient_opt.org | 358 ++++ src/mo_optimization/org/hessian_list_opt.org | 1141 ++++++++++++ src/mo_optimization/org/hessian_opt.org | 1056 +++++++++++ src/mo_optimization/org/my_providers.org | 308 ++++ src/mo_optimization/org/optimization.org | 91 + src/mo_optimization/org/orb_opt_trust_v2.org | 349 ++++ .../org/state_average_energy.org | 73 + .../org/state_weight_normalization.org | 31 + src/mo_optimization/org/update_parameters.org | 16 + .../org/update_st_av_ci_energy.org | 26 + .../run_orb_opt_trust_v2.irp.f | 317 ++++ src/mo_optimization/save_energy.irp.f | 9 + .../state_average_energy.irp.f | 72 + .../state_weight_normalization.irp.f | 29 + src/mo_optimization/update_parameters.irp.f | 15 + .../update_st_av_ci_energy.irp.f | 25 + 57 files changed, 18356 insertions(+) create mode 100644 src/mo_optimization/83.mo_optimization.bats create mode 100644 src/mo_optimization/EZFIO.cfg create mode 100644 src/mo_optimization/NEED create mode 100644 src/mo_optimization/README.md create mode 100644 src/mo_optimization/class.irp.f create mode 100644 src/mo_optimization/constants.h create mode 100644 src/mo_optimization/debug_gradient_list_opt.irp.f create mode 100644 src/mo_optimization/debug_gradient_opt.irp.f create mode 100644 src/mo_optimization/debug_hessian_list_opt.irp.f create mode 100644 src/mo_optimization/debug_hessian_opt.irp.f create mode 100644 src/mo_optimization/diagonal_hessian_list_opt.irp.f create mode 100644 src/mo_optimization/diagonal_hessian_opt.irp.f create mode 100644 src/mo_optimization/diagonalization_hessian.irp.f create mode 100644 src/mo_optimization/first_diagonal_hessian_list_opt.irp.f create mode 100644 src/mo_optimization/first_diagonal_hessian_opt.irp.f create mode 100644 src/mo_optimization/first_gradient_list_opt.irp.f create mode 100644 src/mo_optimization/first_gradient_opt.irp.f create mode 100644 src/mo_optimization/first_hessian_list_opt.irp.f create mode 100644 src/mo_optimization/first_hessian_opt.irp.f create mode 100644 src/mo_optimization/gradient_list_opt.irp.f create mode 100644 src/mo_optimization/gradient_opt.irp.f create mode 100644 src/mo_optimization/hessian_list_opt.irp.f create mode 100644 src/mo_optimization/hessian_opt.irp.f create mode 100644 src/mo_optimization/my_providers.irp.f create mode 100644 src/mo_optimization/orb_opt.irp.f create mode 100755 src/mo_optimization/org/TANGLE_org_mode.sh create mode 100644 src/mo_optimization/org/TODO.org create mode 100644 src/mo_optimization/org/debug_gradient_list_opt.org create mode 100644 src/mo_optimization/org/debug_gradient_opt.org create mode 100644 src/mo_optimization/org/debug_hessian_list_opt.org create mode 100644 src/mo_optimization/org/debug_hessian_opt.org create mode 100644 src/mo_optimization/org/diagonal_hessian_list_opt.org create mode 100644 src/mo_optimization/org/diagonal_hessian_opt.org create mode 100644 src/mo_optimization/org/diagonalization_hessian.org create mode 100644 src/mo_optimization/org/first_diagonal_hessian_list_opt.org create mode 100644 src/mo_optimization/org/first_diagonal_hessian_opt.org create mode 100644 src/mo_optimization/org/first_gradient_list_opt.org create mode 100644 src/mo_optimization/org/first_gradient_opt.org create mode 100644 src/mo_optimization/org/first_hessian_list_opt.org create mode 100644 src/mo_optimization/org/first_hessian_opt.org create mode 100644 src/mo_optimization/org/gradient_list_opt.org create mode 100644 src/mo_optimization/org/gradient_opt.org create mode 100644 src/mo_optimization/org/hessian_list_opt.org create mode 100644 src/mo_optimization/org/hessian_opt.org create mode 100644 src/mo_optimization/org/my_providers.org create mode 100644 src/mo_optimization/org/optimization.org create mode 100644 src/mo_optimization/org/orb_opt_trust_v2.org create mode 100644 src/mo_optimization/org/state_average_energy.org create mode 100644 src/mo_optimization/org/state_weight_normalization.org create mode 100644 src/mo_optimization/org/update_parameters.org create mode 100644 src/mo_optimization/org/update_st_av_ci_energy.org create mode 100644 src/mo_optimization/run_orb_opt_trust_v2.irp.f create mode 100644 src/mo_optimization/save_energy.irp.f create mode 100644 src/mo_optimization/state_average_energy.irp.f create mode 100644 src/mo_optimization/state_weight_normalization.irp.f create mode 100644 src/mo_optimization/update_parameters.irp.f create mode 100644 src/mo_optimization/update_st_av_ci_energy.irp.f diff --git a/src/mo_optimization/83.mo_optimization.bats b/src/mo_optimization/83.mo_optimization.bats new file mode 100644 index 00000000..5bc3d313 --- /dev/null +++ b/src/mo_optimization/83.mo_optimization.bats @@ -0,0 +1,62 @@ +#!/usr/bin/env bats + +source $QP_ROOT/tests/bats/common.bats.sh +source $QP_ROOT/quantum_package.rc + + +function run() { + thresh=2e-3 + test_exe scf || skip + qp set_file $1 + qp edit --check + qp reset -a + qp run scf + qp set_frozen_core + qp set determinants n_states 2 + qp set determinants read_wf true + qp set mo_two_e_ints io_mo_two_e_integrals None + file="$(echo $1 | sed 's/.ezfio//g')" + qp run cis + qp run debug_gradient_list_opt > $file.debug_g.out + err3="$(grep 'Max error:' $file.debug_g.out | awk '{print $3}')" + qp run debug_hessian_list_opt > $file.debug_h1.out + err1="$(grep 'Max error:' $file.debug_h1.out | awk '{print $3}')" + qp run orb_opt > $file.opt1.out + energy1="$(grep 'State average energy:' $file.opt1.out | tail -n 1 | awk '{print $4}')" + qp set orbital_optimization optimization_method diag + qp reset -d + qp run scf + qp run cis + qp run debug_hessian_list_opt > $file.debug_h2.out + err2="$(grep 'Max error_H:' $file.debug_h2.out | awk '{print $3}')" + qp run orb_opt > $file.opt2.out + energy2="$(grep 'State average energy:' $file.opt2.out | tail -n 1 | awk '{print $4}')" + qp set orbital_optimization optimization_method full + qp reset -d + qp run scf + eq $energy1 $2 $thresh + eq $energy2 $3 $thresh + eq $err1 0.0 1e-12 + eq $err2 0.0 1e-12 + eq $err3 0.0 1e-12 +} + +@test "b2_stretched" { +run b2_stretched.ezfio -48.9852901484277 -48.9852937541510 +} + +@test "h2o" { +run h2o.ezfio -75.9025622449206 -75.8691844585879 +} + +@test "h2s" { +run h2s.ezfio -398.576255809878 -398.574145943928 +} + +@test "hbo" { +run hbo.ezfio -99.9234823022109 -99.9234763597840 +} + +@test "hco" { +run hco.ezfio -113.204915552241 -113.204905207050 +} diff --git a/src/mo_optimization/EZFIO.cfg b/src/mo_optimization/EZFIO.cfg new file mode 100644 index 00000000..8944e507 --- /dev/null +++ b/src/mo_optimization/EZFIO.cfg @@ -0,0 +1,29 @@ +[optimization_method] +type: character*(32) +doc: Define the kind of hessian for the orbital optimization full : full hessian, diag : diagonal hessian, none : no hessian +interface: ezfio,provider,ocaml +default: full + +[n_det_start] +type: integer +doc: Number of determinants after which the orbital optimization will start, n_det_start must be greater than 1. The algorithm does a cipsi until n_det > n_det_start and the optimization starts after +interface: ezfio,provider,ocaml +default: 5 + +[n_det_max_opt] +type: integer +doc: Maximal number of the determinants in the wf for the orbital optimization (to stop the optimization if n_det > n_det_max_opt) +interface: ezfio,provider,ocaml +default: 200000 + +[optimization_max_nb_iter] +type: integer +doc: Maximal number of iterations for the orbital optimization +interface: ezfio,provider,ocaml +default: 20 + +[thresh_opt_max_elem_grad] +type: double precision +doc: Threshold for the convergence, the optimization exits when the biggest element in the gradient is smaller than thresh_optimization_max_elem_grad +interface: ezfio,provider,ocaml +default: 1.e-5 diff --git a/src/mo_optimization/NEED b/src/mo_optimization/NEED new file mode 100644 index 00000000..91f41ee3 --- /dev/null +++ b/src/mo_optimization/NEED @@ -0,0 +1,7 @@ +two_body_rdm +hartree_fock +cipsi +davidson_undressed +selectors_full +generators_full +utils_trust_region diff --git a/src/mo_optimization/README.md b/src/mo_optimization/README.md new file mode 100644 index 00000000..94f29aee --- /dev/null +++ b/src/mo_optimization/README.md @@ -0,0 +1,74 @@ +# Orbital optimization + +## Methods +Different methods are available: +- full hessian +``` +qp set orbital_optimization optimization_method full +``` +- diagonal hessian +``` +qp set orbital_optimization optimization_method diag +``` +- identity matrix +``` +qp set orbital_optimization optimization_method none +``` + +After the optimization the ezfio contains the optimized orbitals + +## For a fixed number of determinants +To optimize the MOs for the actual determinants: +``` +qp run orb_opt +``` + +## For a complete optimization, i.e, with a larger and larger wave function +To optimize the MOs with a larger and larger wave function: +``` +qp run optimization +``` + +The results are stored in the EZFIO in "mo_optimization/result_opt", +with the following format: +(1) (2) (3) (4) +1: Number of determinants in the wf, +2: Cispi energy before the optimization, +3: Cipsi energy after the optimization, +4: Energy difference between (2) and (3). + +The optimization process if the following: +- we do a first cipsi step to obtain a small number of determinants in the wf +- we run an orbital optimization for this wf +- we do a new cipsi step to double the number of determinants in the wf +- we run an orbital optimization for this wf +- ... +- we do that until the energy difference between (2) and (3) is + smaller than the targeted accuracy for the cispi (targeted_accuracy_cipsi in qp edit) + or the wf is larger than a given size (n_det_max_opt in qp_edit) +- after that you can reset your determinants (qp reset -d) and run a clean Cispi calculation + +### End of the optimization +You can choos the number of determinants after what the +optimization will stop: +``` +qp set orbital_optimization n_det_max_opt 1e5 # or any number +``` +## Weight of the states +You can change the weights of the differents states directly in qp edit. +It will affect ths weights used in the orbital optimization. + +# Tests +To run the tests: +``` +qp test +``` + +# Org files +The org files are stored in the directory org in order to avoid overwriting on user changes. +The org files can be modified, to export the change to the source code, run +``` +./TANGLE_org_mode.sh +mv *.irp.f ../. +``` + diff --git a/src/mo_optimization/class.irp.f b/src/mo_optimization/class.irp.f new file mode 100644 index 00000000..b4a68ac2 --- /dev/null +++ b/src/mo_optimization/class.irp.f @@ -0,0 +1,12 @@ + BEGIN_PROVIDER [ logical, do_only_1h1p ] +&BEGIN_PROVIDER [ logical, do_only_cas ] +&BEGIN_PROVIDER [ logical, do_ddci ] + implicit none + BEGIN_DOC + ! In the FCI case, all those are always false + END_DOC + do_only_1h1p = .False. + do_only_cas = .False. + do_ddci = .False. +END_PROVIDER + diff --git a/src/mo_optimization/constants.h b/src/mo_optimization/constants.h new file mode 100644 index 00000000..1cd00bda --- /dev/null +++ b/src/mo_optimization/constants.h @@ -0,0 +1 @@ + logical, parameter :: debug=.False. diff --git a/src/mo_optimization/debug_gradient_list_opt.irp.f b/src/mo_optimization/debug_gradient_list_opt.irp.f new file mode 100644 index 00000000..867e0105 --- /dev/null +++ b/src/mo_optimization/debug_gradient_list_opt.irp.f @@ -0,0 +1,78 @@ +! Debug the gradient + +! *Program to check the gradient* + +! The program compares the result of the first and last code for the +! gradient. + +! Provided: +! | mo_num | integer | number of MOs | + +! Internal: +! | n | integer | number of orbitals pairs (p,q) p threshold) then + print*,i,v_grad(i) + nb_error = nb_error + 1 + + if (ABS(v_grad(i)) > max_error) then + max_error = v_grad(i) + endif + + endif + enddo + + print*,'' + print*,'Check the gradient' + print*,'Threshold:', threshold + print*,'Nb error:', nb_error + print*,'Max error:', max_error + + ! Deallocation + deallocate(v_grad,v_grad2) + +end program diff --git a/src/mo_optimization/debug_gradient_opt.irp.f b/src/mo_optimization/debug_gradient_opt.irp.f new file mode 100644 index 00000000..8aeec18f --- /dev/null +++ b/src/mo_optimization/debug_gradient_opt.irp.f @@ -0,0 +1,76 @@ +! Debug the gradient + +! *Program to check the gradient* + +! The program compares the result of the first and last code for the +! gradient. + +! Provided: +! | mo_num | integer | number of MOs | + +! Internal: +! | n | integer | number of orbitals pairs (p,q) p threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + + if (ABS(v_grad(i)) > max_error) then + max_error = v_grad(i) + endif + + endif + enddo + + print*,'' + print*,'Check the gradient' + print*,'Threshold :', threshold + print*,'Nb error :', nb_error + print*,'Max error :', max_error + + ! Deallocation + deallocate(v_grad,v_grad2) + +end program diff --git a/src/mo_optimization/debug_hessian_list_opt.irp.f b/src/mo_optimization/debug_hessian_list_opt.irp.f new file mode 100644 index 00000000..d1aa79c4 --- /dev/null +++ b/src/mo_optimization/debug_hessian_list_opt.irp.f @@ -0,0 +1,147 @@ +! Debug the hessian + +! *Program to check the hessian matrix* + +! The program compares the result of the first and last code for the +! hessian. First of all the 4D hessian and after the 2D hessian. + +! Provided: +! | mo_num | integer | number of MOs | +! | optimization_method | string | Method for the orbital optimization: | +! | | | - 'full' -> full hessian | +! | | | - 'diag' -> diagonal hessian | +! | dim_list_act_orb | integer | number of active MOs | +! | list_act(dim_list_act_orb) | integer | list of the actives MOs | +! | | | | + +! Internal: +! | m | integer | number of MOs in the list | +! | | | (active MOs) | +! | n | integer | number of orbitals pairs (p,q) p threshold) then + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + endif + enddo + enddo + enddo + enddo + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + ! Deallocation + deallocate(H, H2, h_f, h_f2) + + else + + print*, 'Use the diagonal hessian matrix' + allocate(H(n,1),H2(n,1)) + call diag_hessian_list_opt(n,m,list_act,H) + call first_diag_hessian_list_opt(n,m,list_act,H2) + + H = H - H2 + + max_error_H = 0d0 + nb_error_H = 0 + + do i = 1, n + if (ABS(H(i,1)) > threshold) then + print*, H(i,1) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,1)) > ABS(max_error_H)) then + max_error_H = H(i,1) + endif + + endif + enddo + + endif + + print*,'' + if (optimization_method == 'full') then + print*,'Check of the full hessian' + print*,'Threshold:', threshold + print*,'Nb error:', nb_error + print*,'Max error:', max_error + print*,'' + else + print*,'Check of the diagonal hessian' + endif + + print*,'Nb error_H:', nb_error_H + print*,'Max error_H:', max_error_H + +end program diff --git a/src/mo_optimization/debug_hessian_opt.irp.f b/src/mo_optimization/debug_hessian_opt.irp.f new file mode 100644 index 00000000..6d22cc01 --- /dev/null +++ b/src/mo_optimization/debug_hessian_opt.irp.f @@ -0,0 +1,171 @@ +! Debug the hessian + +! *Program to check the hessian matrix* + +! The program compares the result of the first and last code for the +! hessian. First of all the 4D hessian and after the 2D hessian. + +! Provided: +! | mo_num | integer | number of MOs | + +! Internal: +! | n | integer | number of orbitals pairs (p,q) p threshold) then + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + endif + enddo + enddo + enddo + enddo + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + elseif (optimization_method == 'diag') then + + print*, 'Use the diagonal hessian matrix' + call diag_hessian_opt(n,H,h_f) + call first_diag_hessian_opt(n,H2,h_f2) + + h_f = h_f - h_f2 + max_error = 0d0 + nb_error = 0 + threshold = 1d-12 + + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + + if (ABS(h_f(i,j,k,l)) > threshold) then + + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + + endif + + enddo + enddo + enddo + enddo + + h=H-H2 + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + else + print*,'Unknown optimization_method, please select full, diag' + call abort + endif + + print*,'' + if (optimization_method == 'full') then + print*,'Check the full hessian' + else + print*,'Check the diagonal hessian' + endif + + print*,'Threshold :', threshold + print*,'Nb error :', nb_error + print*,'Max error :', max_error + print*,'' + print*,'Nb error_H :', nb_error_H + print*,'Max error_H :', max_error_H + + ! Deallocation + deallocate(H,H2,h_f,h_f2) + +end program diff --git a/src/mo_optimization/diagonal_hessian_list_opt.irp.f b/src/mo_optimization/diagonal_hessian_list_opt.irp.f new file mode 100644 index 00000000..fe54fa7a --- /dev/null +++ b/src/mo_optimization/diagonal_hessian_list_opt.irp.f @@ -0,0 +1,1556 @@ +! Diagonal hessian + +! The hessian of the CI energy with respects to the orbital rotation is : +! (C-c C-x C-l) + +! \begin{align*} +! H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ +! &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ +! &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ +! &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} +! With pq a permutation operator : + +! \begin{align*} +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! \end{align*} +! \begin{align*} +! \mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +! &= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +! \end{align*} + +! Where p,q,r,s,t,u,v are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +! Here for the diagonal of the hessian it's a little more complicated +! than for the hessian. It's not just compute the diagonal terms of the +! hessian because of the permutations. + +! The hessian is (p,q,r,s), so the diagonal terms are (p,q,p,q). But +! with the permutations : p <-> q, r <-> s, p <-> q and r <-> s, we have +! a diagonal term, if : +! p = r and q = s, => (p,q,p,q) +! or +! q = r and p = s, => (p,q,q,p) + +! For that reason, we will use 2D temporary arrays to store the +! elements. One for the terms (p,q,p,q) and an other for the terms of +! kind (p,q,q,p). We will also use a 1D temporary array to store the +! terms of the kind (p,p,p,p) due to the kronoecker delta. + +! *Compute the diagonal hessian of energy with respects to orbital +! rotations* +! By diagonal hessian we mean, diagonal elements of the hessian + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +! | two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | H(n,n) | double precision | Hessian matrix | +! | h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +! | | | in n by n matrix | + +! Internal: +! | hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +! | | | the permutations | +! | p, q, r, s | integer | indexes of the hessian elements | +! | t, u, v | integer | indexes for the sums | +! | pq, rs | integer | indexes for the transformation of the hessian | +! | | | (4D -> 2D) | +! | t1,t2,t3 | double precision | time to compute the hessian | +! | t4,t5,t6 | double precision | time to compute the differ each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (private) | +! | tmp_bi_int_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (shared) | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (private) | +! | tmp_2rdm_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (shared) | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array (private) | +! | tmp_accu_shared(mo_num,mo_num) | double precision | temporary array (shared) | +! | tmp_accu_1(mo_num) | double precision | temporary array (private) | +! | tmp_accu_1_shared(mo_num) | double precision | temporary array (shared) | +! | tmp_h_pppp(mo_num) | double precision | matrix containing the hessien elements hessian(p,p,p,p) | +! | tmp_h_pqpq(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,p,q) | +! | tmp_h_pqqp(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,q,p) | + +! Function: +! | get_two_e_integral | double precision | bi-electronic integrals | + + +subroutine diag_hessian_list_opt(n, m, list, H)!, h_tmpr) + + use omp_lib + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n, m, list(m) + + ! out + double precision, intent(out) :: H(n)!, h_tmpr(m,m,m,m) + + ! internal + !double precision, allocatable :: !hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + integer :: p,q,k + integer :: r,s,t,u,v + integer :: pq,rs + integer :: tmp_p,tmp_q,tmp_r,tmp_s,tmp_pq,tmp_rs + double precision :: t1,t2,t3,t4,t5,t6 + double precision, allocatable :: tmp_bi_int_3(:,:,:),tmp_bi_int_3_shared(:,:,:) + double precision, allocatable :: tmp_2rdm_3(:,:,:),tmp_2rdm_3_shared(:,:,:) + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_accu_shared(:,:), tmp_accu_1_shared(:) + double precision, allocatable :: tmp_h_pppp(:), tmp_h_pqpq(:,:), tmp_h_pqqp(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'--- Diagonal_hessian_list_opt---' + + ! Allocation of shared arrays + !allocate(hessian(m,m,m,m))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_h_pppp(m),tmp_h_pqpq(m,m),tmp_h_pqqp(m,m)) + allocate(tmp_2rdm_3_shared(mo_num,mo_num,m)) + allocate(tmp_bi_int_3_shared(mo_num,mo_num,m)) + allocate(tmp_accu_1_shared(m),tmp_accu_shared(m,m)) + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu,k, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, & + !$OMP tmp_p,tmp_q,tmp_r,tmp_s) & + !$OMP SHARED(H, tmp_h_pppp, tmp_h_pqpq, tmp_h_pqqp, & + !$OMP mo_num,n,m, mo_one_e_integrals, one_e_dm_mo, list, & + !$OMP tmp_bi_int_3_shared, tmp_2rdm_3_shared,tmp_accu_shared, & + !$OMP tmp_accu_1_shared,two_e_dm_mo,mo_integrals_map,t1,t2,t3,t4,t5,t6) & + !$OMP DEFAULT(NONE) + + ! Allocation of the private arrays + allocate(tmp_accu(m,m)) + +! Initialization of the arrays + +!!$OMP DO +!do tmp_s = 1,m +! do tmp_r = 1, m +! do tmp_q = 1, m +! do tmp_p = 1, m +! hessian(tmp_p,tmp_q,tmp_r,tmp_s) = 0d0 +! enddo +! enddo +! enddo +!enddo +!!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + tmp_h_pqpq(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + tmp_h_pqqp(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t1) +!$OMP END MASTER + +! Line 1, term 1 + +! \begin{align*} +! \frac{1}{2} \sum_u \delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (q==r) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 1',t6 +!$OMP END MASTER + +! Line 1, term 2 + +! \begin{align*} +! \frac{1}{2} \sum_u \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (p==s) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! *Part 1 : p=r and q=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * (& +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * (& +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(q,u)) +! = +! mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_q) = tmp_accu_1_shared(tmp_q) + & + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_q) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 2',t6 +!$OMP END MASTER + +! Line 1, term 3 + +! \begin{align*} +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - mo_one_e_integrals(s,p) * one_e_rdm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_rdm_mo(p,s) + +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) +! = +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) +! = +! - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) & + - 2d0 * mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) +! = +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) +! = +! - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) & + - 2d0 * mo_one_e_integrals(p,p) * one_e_dm_mo(q,q) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 3',t6 +!$OMP END MASTER + +! Line 2, term 1 + +! \begin{align*} +! \frac{1}{2} \sum_{tuv} \delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (q==r) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + +! enddo +! enddo +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & +! + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +allocate(tmp_bi_int_3(mo_num, mo_num, m),tmp_2rdm_3(mo_num, mo_num, m)) + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) & + + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & +! + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'l2 1',t6 +!$OMP END MASTER + +! Line 2, term 2 + +! \begin{align*} +! \frac{1}{2} \sum_{tuv} \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (p==s) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + +! enddo +! enddo +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! 0.5d0 * ( & +! get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v) & +! + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) +& + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(q,t,u,v) & +! + get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_q) = two_e_dm_mo(u,v,q,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_q = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_q) = tmp_accu_1_shared(tmp_q) +& + tmp_bi_int_3(u,v,tmp_q) * tmp_2rdm_3(u,v,tmp_q) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'l2 2',t6 +!$OMP END MASTER + +! Line 3, term 1 + +! \begin{align*} +! \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)))) then + +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! With optimization + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +! = +! get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) & +! + get_two_e_integral(q,q,u,v,mo_integrals_map) * two_e_dm_mo(p,p,u,v) +! = +! 2d0 * get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) + +! Arrays of the kind (u,v,p,p) can be transform in 4D arrays (u,v,p). +! Using u,v as one variable a matrix multiplication appears. +! $$c_{p,q} = \sum_{uv} a_{p,uv} b_{uv,q}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,v,tmp_q) = two_e_dm_mo(u,v,q,q) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_shared(u,v,tmp_p) = get_two_e_integral(u,v,p,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu(tmp_p,tmp_q) + tmp_accu(tmp_q,tmp_p) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +! = +! get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) & +! + get_two_e_integral(q,p,u,v,mo_integrals_map) * two_e_dm_mo(p,q,u,v) +! = +! 2d0 * get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!$OMP MASTER +call wall_time(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = 2d0 * get_two_e_integral(u,v,q,p,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,q) + + enddo + enddo + enddo + + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) & + + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3,tmp_2rdm_3) + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l3 1',t6 +!$OMP END MASTER + +! Line 3, term 2 + +! \begin{align*} +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & +! .or. ((p==s) .and. (q==r))) then + +! do t = 1, mo_num +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + +! enddo +! enddo + +! endif + +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +! = +! - get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & +! - get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) & +! - get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(p,u,q,t) & +! - get_two_e_integral(q,u,t,p,mo_integrals_map) * two_e_dm_mo(p,u,t,q) +! = +! - 2d0 * get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) +! = +! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) & +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!---------- +! Part 1.1 +!---------- +! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) + +allocate(tmp_bi_int_3(m, mo_num, m), tmp_2rdm_3(m, mo_num, m)) + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_shared(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do tmp_q = 1, m + q = list(tmp_q) + + tmp_bi_int_3(tmp_q,u,tmp_p) = 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do tmp_q = 1, m + q = list(tmp_q) + + tmp_2rdm_3(tmp_q,u,tmp_p) = two_e_dm_mo(q,u,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do u = 1, mo_num + do tmp_q = 1, m + + tmp_accu_shared(tmp_p,tmp_q) = tmp_accu_shared(tmp_p,tmp_q) & + - tmp_bi_int_3(tmp_q,u,tmp_p) * tmp_2rdm_3(tmp_q,u,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu_shared(tmp_p,tmp_q) + + enddo +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3, tmp_2rdm_3) + + + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!-------- +! Part 1.2 +!-------- +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +allocate(tmp_bi_int_3(mo_num, m, m),tmp_2rdm_3(mo_num, m, m)) + + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_shared(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do u = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + + tmp_bi_int_3(t,tmp_q,tmp_p) = 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p= 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + + tmp_2rdm_3(t,tmp_q,tmp_p) = two_e_dm_mo(t,p,q,u) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_q = 1, m + do tmp_p = 1, m + do t = 1, mo_num + + tmp_accu_shared(tmp_p,tmp_q) = tmp_accu_shared(tmp_p,tmp_q) & + - tmp_bi_int_3(t,tmp_q,tmp_p) * tmp_2rdm_3(t,tmp_q,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu_shared(tmp_p,tmp_q) + + enddo +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3,tmp_2rdm_3) + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +! = +! - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & +! - get_two_e_integral(t,p,p,u,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & +! - get_two_e_integral(q,u,q,t,mo_integrals_map) * two_e_dm_mo(p,u,p,t) & +! - get_two_e_integral(q,u,t,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) +! = +! - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & +! - get_two_e_integral(q,t,q,u,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(t,p,p,u) +! = +! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & +! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +! Arrays of the kind (t,p,u,p) can be transformed in 3D arrays. By doing +! so and using t,u as one variable, a matrix multiplication appears : +! $$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + + +!---------- +! Part 2.1 +!---------- +! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & +! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3_shared(t,u,tmp_q) = two_e_dm_mo(t,q,u,q) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,tmp_p) = get_two_e_integral(t,p,u,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + +!$OMP DO +do tmp_p = 1, m + do tmp_q = 1, m + + tmp_h_pqqp(tmp_q,tmp_p) = tmp_h_pqqp(tmp_q,tmp_p) - tmp_accu(tmp_q,tmp_p) - tmp_accu(tmp_p,tmp_q) + + enddo +enddo +!$OMP END DO + + + +! Arrays of the kind (t,u,p,p) can be transformed in 3D arrays. By doing +! so and using t,u as one variable, a matrix multiplication appears : +! $$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + + +!-------- +! Part 2.2 +!-------- +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,tmp_p) = get_two_e_integral(t,u,p,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,t,tmp_q) = two_e_dm_mo(q,u,t,q) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_shared,& + mo_num*mo_num, tmp_bi_int_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) - tmp_accu(tmp_p,tmp_q) - tmp_accu(tmp_q,tmp_p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l3 2',t6 +!$OMP END MASTER + +!$OMP MASTER +CALL wall_TIME(t2) +t2 = t2 - t1 +print*, 'Time to compute the hessian :', t2 +!$OMP END MASTER + +! Deallocation of private arrays +! In the OMP section ! + +deallocate(tmp_accu) + +! Permutations +! As we mentioned before there are two permutation operator in the +! formula : +! Hessian(p,q,r,s) = P_pq P_rs [...] +! => Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + +!!$OMP DO +!do tmp_p = 1, m +! hessian(tmp_p,tmp_p,tmp_p,tmp_p) = hessian(tmp_p,tmp_p,tmp_p,tmp_p) + tmp_h_pppp(tmp_p) +!enddo +!!$OMP END DO + +!!$OMP DO +!do tmp_q = 1, m +! do tmp_p = 1, m +! hessian(tmp_p,tmp_q,tmp_p,tmp_q) = hessian(tmp_p,tmp_q,tmp_p,tmp_q) + tmp_h_pqpq(tmp_p,tmp_q) +! enddo +!enddo +!!$OMP END DO +! +!!$OMP DO +!do tmp_q = 1, m +! do tmp_p = 1, m +! hessian(tmp_p,tmp_q,tmp_q,tmp_p) = hessian(tmp_p,tmp_q,tmp_q,tmp_p) + tmp_h_pqqp(tmp_p,tmp_q) +! enddo +!enddo +!!$OMP END DO + +!!$OMP DO +!do tmp_s = 1, m +! do tmp_r = 1, m +! do tmp_q = 1, m +! do tmp_p = 1, m + +! h_tmpr(tmp_p,tmp_q,tmp_r,tmp_s) = (hessian(tmp_p,tmp_q,tmp_r,tmp_s) - hessian(tmp_q,tmp_p,tmp_r,tmp_s) & +! - hessian(tmp_p,tmp_q,tmp_s,tmp_r) + hessian(tmp_q,tmp_p,tmp_s,tmp_r)) + +! enddo +! enddo +! enddo +!enddo +!!$OMP END DO + +! 4D -> 2D matrix +! We need a 2D matrix for the Newton method's. Since the Hessian is +! "antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +! We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +! with p 2D :',t6 +!!$OMP END MASTER + +!$OMP END PARALLEL +call omp_set_max_active_levels(4) + +! Display +!if (debug) then +! print*,'2D diag Hessian matrix' +! do tmp_pq = 1, n +! write(*,'(100(F10.5))') H(tmp_pq,:) +! enddo +!endif + +! Deallocation of shared arrays, end + + +!deallocate(hessian)!,h_tmpr) + deallocate(tmp_h_pppp,tmp_h_pqpq,tmp_h_pqqp) + deallocate(tmp_accu_1_shared, tmp_accu_shared) + + print*,'---End diagonal_hessian_list_opt---' + +end subroutine diff --git a/src/mo_optimization/diagonal_hessian_opt.irp.f b/src/mo_optimization/diagonal_hessian_opt.irp.f new file mode 100644 index 00000000..7688ec37 --- /dev/null +++ b/src/mo_optimization/diagonal_hessian_opt.irp.f @@ -0,0 +1,1511 @@ +! Diagonal hessian + +! The hessian of the CI energy with respects to the orbital rotation is : +! (C-c C-x C-l) + +! \begin{align*} +! H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ +! &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ +! &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ +! &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} +! With pq a permutation operator : + +! \begin{align*} +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! \end{align*} +! \begin{align*} +! \mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +! &= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +! \end{align*} + +! Where p,q,r,s,t,u,v are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +! Here for the diagonal of the hessian it's a little more complicated +! than for the hessian. It's not just compute the diagonal terms of the +! hessian because of the permutations. + +! The hessian is (p,q,r,s), so the diagonal terms are (p,q,p,q). But +! with the permutations : p <-> q, r <-> s, p <-> q and r <-> s, we have +! a diagonal term, if : +! p = r and q = s, => (p,q,p,q) +! or +! q = r and p = s, => (p,q,q,p) + +! For that reason, we will use 2D temporary arrays to store the +! elements. One for the terms (p,q,p,q) and an other for the terms of +! kind (p,q,q,p). We will also use a 1D temporary array to store the +! terms of the kind (p,p,p,p) due to the kronoecker delta. + +! *Compute the diagonal hessian of energy with respects to orbital +! rotations* +! By diagonal hessian we mean, diagonal elements of the hessian + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +! | two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | H(n,n) | double precision | Hessian matrix | +! | h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +! | | | in n by n matrix | + +! Internal: +! | hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +! | | | the permutations | +! | p, q, r, s | integer | indexes of the hessian elements | +! | t, u, v | integer | indexes for the sums | +! | pq, rs | integer | indexes for the transformation of the hessian | +! | | | (4D -> 2D) | +! | t1,t2,t3 | double precision | time to compute the hessian | +! | t4,t5,t6 | double precision | time to compute the differ each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (private) | +! | tmp_bi_int_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (shared) | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (private) | +! | tmp_2rdm_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (shared) | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array (private) | +! | tmp_accu_shared(mo_num,mo_num) | double precision | temporary array (shared) | +! | tmp_accu_1(mo_num) | double precision | temporary array (private) | +! | tmp_accu_1_shared(mo_num) | double precision | temporary array (shared) | +! | tmp_h_pppp(mo_num) | double precision | matrix containing the hessien elements hessian(p,p,p,p) | +! | tmp_h_pqpq(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,p,q) | +! | tmp_h_pqqp(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,q,p) | + +! Function: +! | get_two_e_integral | double precision | bi-electronic integrals | + + +subroutine diag_hessian_opt(n,H)!, h_tmpr) + + use omp_lib + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: H(n)!,n), h_tmpr(mo_num,mo_num,mo_num,mo_num) + + ! internal + !double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + integer :: p,q,k + integer :: r,s,t,u,v + integer :: pq,rs + integer :: istate + double precision :: t1,t2,t3,t4,t5,t6 + double precision, allocatable :: tmp_bi_int_3(:,:,:),tmp_bi_int_3_shared(:,:,:) + double precision, allocatable :: tmp_2rdm_3(:,:,:),tmp_2rdm_3_shared(:,:,:) + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_accu_shared(:,:), tmp_accu_1_shared(:) + double precision, allocatable :: tmp_h_pppp(:), tmp_h_pqpq(:,:), tmp_h_pqqp(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'--- diagonal hessian---' + print*,'Use the diagonal hessian' + + ! Allocation of shared arrays + !allocate(hessian(mo_num,mo_num,mo_num,mo_num))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_h_pppp(mo_num),tmp_h_pqpq(mo_num,mo_num),tmp_h_pqqp(mo_num,mo_num)) + allocate(tmp_2rdm_3_shared(mo_num,mo_num,mo_num)) + allocate(tmp_bi_int_3_shared(mo_num,mo_num,mo_num)) + allocate(tmp_accu_1_shared(mo_num),tmp_accu_shared(mo_num,mo_num)) + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu,k, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3) & + !$OMP SHARED(H, tmp_h_pppp, tmp_h_pqpq, tmp_h_pqqp, & + !$OMP mo_num,n, mo_one_e_integrals, one_e_dm_mo, & + !$OMP tmp_bi_int_3_shared, tmp_2rdm_3_shared,tmp_accu_shared, & + !$OMP tmp_accu_1_shared,two_e_dm_mo,mo_integrals_map,t1,t2,t3,t4,t5,t6) & + !$OMP DEFAULT(NONE) + + ! Allocation of the private arrays + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num),tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(mo_num,mo_num)) + +! Initialization of the arrays + +!!$OMP DO +!do s = 1,mo_num +! do r = 1, mo_num +! do q = 1, mo_num +! do p = 1, mo_num +! hessian(p,q,r,s) = 0d0 +! enddo +! enddo +! enddo +!enddo +!!$OMP END DO + +!$OMP DO +do p = 1, mo_num + tmp_h_pppp(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + tmp_h_pqpq(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + tmp_h_pqqp(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t1) +!$OMP END MASTER + +! Line 1, term 1 + +! \begin{align*} +! \frac{1}{2} \sum_u \delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (q==r) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 1',t6 +!$OMP END MASTER + +! Line 1, term 2 + +! \begin{align*} +! \frac{1}{2} \sum_u \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (p==s) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! *Part 1 : p=r and q=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * (& +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * (& +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(q,u)) +! = +! mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(q) = tmp_accu_1_shared(q) + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(q) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 2',t6 +!$OMP END MASTER + +! Line 1, term 3 + +! \begin{align*} +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - mo_one_e_integrals(s,p) * one_e_rdm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_rdm_mo(p,s) + +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) +! = +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) +! = +! - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) & + - 2d0 * mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) +! = +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) +! = +! - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) & + - 2d0 * mo_one_e_integrals(p,p) * one_e_dm_mo(q,q) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 3',t6 +!$OMP END MASTER + +! Line 2, term 1 + +! \begin{align*} +! \frac{1}{2} \sum_{tuv} \delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (q==r) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + +! enddo +! enddo +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & +! + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) & + + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do p =1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & +! + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + & + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'l2 1',t6 +!$OMP END MASTER + +! Line 2, term 2 + +! \begin{align*} +! \frac{1}{2} \sum_{tuv} \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (p==s) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + +! enddo +! enddo +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! 0.5d0 * ( & +! get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v) & +! + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) +& + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(q,t,u,v) & +! + get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP DO +do p = 1,mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,q) = two_e_dm_mo(u,v,q,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(q) = tmp_accu_1_shared(q) +& + tmp_bi_int_3(u,v,q) * tmp_2rdm_3(u,v,q) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'l2 2',t6 +!$OMP END MASTER + +! Line 3, term 1 + +! \begin{align*} +! \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)))) then + +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! With optimization + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +! = +! get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) & +! + get_two_e_integral(q,q,u,v,mo_integrals_map) * two_e_dm_mo(p,p,u,v) +! = +! 2d0 * get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) + +! Arrays of the kind (u,v,p,p) can be transform in 4D arrays (u,v,p). +! Using u,v as one variable a matrix multiplication appears. +! $$c_{p,q} = \sum_{uv} a_{p,uv} b_{uv,q}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,v,q) = two_e_dm_mo(u,v,q,q) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_shared(u,v,p) = get_two_e_integral(u,v,p,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu(p,q) + tmp_accu(q,p) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +! = +! get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) & +! + get_two_e_integral(q,p,u,v,mo_integrals_map) * two_e_dm_mo(p,q,u,v) +! = +! 2d0 * get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!$OMP MASTER +call wall_time(t4) +!$OMP END MASTER + +!$OMP DO +do q = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = 2d0 * get_two_e_integral(u,v,q,p,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,q) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) & + + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l3 1',t6 +!$OMP END MASTER + +! Line 3, term 2 + +! \begin{align*} +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & +! .or. ((p==s) .and. (q==r))) then + +! do t = 1, mo_num +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + +! enddo +! enddo + +! endif + +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +! = +! - get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & +! - get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) & +! - get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(p,u,q,t) & +! - get_two_e_integral(q,u,t,p,mo_integrals_map) * two_e_dm_mo(p,u,t,q) +! = +! - 2d0 * get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) +! = +! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) & +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!---------- +! Part 1.1 +!---------- +! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_bi_int_3(q,u,p) = 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_2rdm_3(q,u,p) = two_e_dm_mo(q,u,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_accu_shared(p,q) = tmp_accu_shared(p,q) & + - tmp_bi_int_3(q,u,p) * tmp_2rdm_3(q,u,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu_shared(p,q) + + enddo +enddo +!$OMP END DO + + + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!-------- +! Part 1.2 +!-------- +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do u = 1, mo_num + + do p = 1, mo_num + do q = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3(t,q,p) = 2d0*get_two_e_integral(t,q,p,u,mo_integrals_map) + + enddo + enddo + enddo + + do p= 1, mo_num + do q = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3(t,q,p) = two_e_dm_mo(t,p,q,u) + + enddo + enddo + enddo + + !$OMP CRITICAL + do q = 1, mo_num + do p = 1, mo_num + do t = 1, mo_num + + tmp_accu_shared(p,q) = tmp_accu_shared(p,q) & + - tmp_bi_int_3(t,q,p) * tmp_2rdm_3(t,q,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu_shared(p,q) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +! = +! - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & +! - get_two_e_integral(t,p,p,u,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & +! - get_two_e_integral(q,u,q,t,mo_integrals_map) * two_e_dm_mo(p,u,p,t) & +! - get_two_e_integral(q,u,t,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) +! = +! - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & +! - get_two_e_integral(q,t,q,u,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(t,p,p,u) +! = +! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & +! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +! Arrays of the kind (t,p,u,p) can be transformed in 3D arrays. By doing +! so and using t,u as one variable, a matrix multiplication appears : +! $$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + + +!---------- +! Part 2.1 +!---------- +! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & +! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) + +!$OMP DO +do q = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3_shared(t,u,q) = two_e_dm_mo(t,q,u,q) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,p) = get_two_e_integral(t,p,u,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + +!$OMP DO +do p = 1, mo_num + do q = 1, mo_num + + tmp_h_pqqp(q,p) = tmp_h_pqqp(q,p) - tmp_accu(q,p) - tmp_accu(p,q) + + enddo +enddo +!$OMP END DO + + + +! Arrays of the kind (t,u,p,p) can be transformed in 3D arrays. By doing +! so and using t,u as one variable, a matrix multiplication appears : +! $$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + + +!-------- +! Part 2.2 +!-------- +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,p) = get_two_e_integral(t,u,p,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,t,q) = two_e_dm_mo(q,u,t,q) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3_shared,& + mo_num*mo_num, tmp_bi_int_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) - tmp_accu(p,q) - tmp_accu(q,p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l3 2',t6 +!$OMP END MASTER + +!$OMP MASTER +CALL wall_TIME(t2) +t2 = t2 - t1 +print*, 'Time to compute the hessian :', t2 +!$OMP END MASTER + +! Deallocation of private arrays +! In the OMP section ! + +deallocate(tmp_2rdm_3,tmp_bi_int_3) +deallocate(tmp_accu) + +! Permutations +! As we mentioned before there are two permutation operator in the +! formula : +! Hessian(p,q,r,s) = P_pq P_rs [...] +! => Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + +!!$OMP DO +!do p = 1, mo_num +! hessian(p,p,p,p) = hessian(p,p,p,p) + tmp_h_pppp(p) +!enddo +!!$OMP END DO + +!!$OMP DO +!do q = 1, mo_num +! do p = 1, mo_num +! hessian(p,q,p,q) = hessian(p,q,p,q) + tmp_h_pqpq(p,q) +! enddo +!enddo +!!$OMP END DO +! +!!$OMP DO +!do q = 1, mo_num +! do p = 1, mo_num +! hessian(p,q,q,p) = hessian(p,q,q,p) + tmp_h_pqqp(p,q) +! enddo +!enddo +!!$OMP END DO + +!!$OMP DO +!do s = 1, mo_num +! do r = 1, mo_num +! do q = 1, mo_num +! do p = 1, mo_num + +! h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + +! enddo +! enddo +! enddo +!enddo +!!$OMP END DO + +! 4D -> 2D matrix +! We need a 2D matrix for the Newton method's. Since the Hessian is +! "antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +! We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +! with p 2D :',t6 +!!$OMP END MASTER + +!$OMP END PARALLEL +call omp_set_max_active_levels(4) + +! Display +!if (debug) then +! print*,'2D diag Hessian matrix' +! do pq = 1, n +! write(*,'(100(F10.5))') H(pq,:) +! enddo +!endif + +! Deallocation of shared arrays, end + + +!deallocate(hessian)!,h_tmpr) + deallocate(tmp_h_pppp,tmp_h_pqpq,tmp_h_pqqp) + deallocate(tmp_accu_1_shared, tmp_accu_shared) + + print*,'---diagonal_hessian' + +end subroutine diff --git a/src/mo_optimization/diagonalization_hessian.irp.f b/src/mo_optimization/diagonalization_hessian.irp.f new file mode 100644 index 00000000..e25879d9 --- /dev/null +++ b/src/mo_optimization/diagonalization_hessian.irp.f @@ -0,0 +1,136 @@ +! Diagonalization of the hessian + +! Just a matrix diagonalization using Lapack + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | H(n,n) | double precision | hessian | + +! Output: +! | e_val(n) | double precision | eigenvalues of the hessian | +! | w(n,n) | double precision | eigenvectors of the hessian | + +! Internal: +! | nb_negative_nv | integer | number of negative eigenvalues | +! | lwork | integer | for Lapack | +! | work(lwork,n) | double precision | temporary array for Lapack | +! | info | integer | if 0 -> ok, else problem in the diagonalization | +! | i,j | integer | dummy indexes | + + +subroutine diagonalization_hessian(n,H,e_val,w) + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: H(n,n) + + ! out + double precision, intent(out) :: e_val(n), w(n,n) + + ! internal + double precision, allocatable :: work(:,:) + integer, allocatable :: key(:) + integer :: info,lwork + integer :: i,j + integer :: nb_negative_vp + double precision :: t1,t2,t3,max_elem + + print*,'' + print*,'---Diagonalization_hessian---' + + call wall_time(t1) + + if (optimization_method == 'full') then + ! Allocation + ! For Lapack + lwork=3*n-1 + + allocate(work(lwork,n)) + + ! Calculation + + ! Copy the hessian matrix, the eigenvectors will be store in W + W=H + + ! Diagonalization of the hessian + call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info) + + if (info /= 0) then + print*, 'Error diagonalization : diagonalization_hessian' + print*, 'info = ', info + call ABORT + endif + + if (debug) then + print *, 'vp Hess:' + write(*,'(100(F10.5))') real(e_val(:)) + endif + + ! Number of negative eigenvalues + max_elem = 0d0 + nb_negative_vp = 0 + do i = 1, n + if (e_val(i) < 0d0) then + nb_negative_vp = nb_negative_vp + 1 + if (e_val(i) < max_elem) then + max_elem = e_val(i) + endif + !print*,'e_val < 0 :', e_val(i) + endif + enddo + print*,'Number of negative eigenvalues:', nb_negative_vp + print*,'Lowest eigenvalue:',max_elem + + !nb_negative_vp = 0 + !do i = 1, n + ! if (e_val(i) < -thresh_eig) then + ! nb_negative_vp = nb_negative_vp + 1 + ! endif + !enddo + !print*,'Number of negative eigenvalues <', -thresh_eig,':', nb_negative_vp + + ! Deallocation + deallocate(work) + + elseif (optimization_method == 'diag') then + ! Diagonalization of the diagonal hessian by hands + allocate(key(n)) + + do i = 1, n + e_val(i) = H(i,i) + enddo + + ! Key list for dsort + do i = 1, n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, n) + + ! Eigenvectors + W = 0d0 + do i = 1, n + j = key(i) + W(j,i) = 1d0 + enddo + + deallocate(key) + else + print*,'Diagonalization_hessian, abort' + call abort + endif + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in diagonalization_hessian:', t3 + + print*,'---End diagonalization_hessian---' + +end subroutine diff --git a/src/mo_optimization/first_diagonal_hessian_list_opt.irp.f b/src/mo_optimization/first_diagonal_hessian_list_opt.irp.f new file mode 100644 index 00000000..58536993 --- /dev/null +++ b/src/mo_optimization/first_diagonal_hessian_list_opt.irp.f @@ -0,0 +1,372 @@ +subroutine first_diag_hessian_list_opt(tmp_n,m,list,H)!, h_tmpr) + + include 'constants.h' + + implicit none + + !=========================================================================== + ! Compute the diagonal hessian of energy with respects to orbital rotations + !=========================================================================== + + !=========== + ! Variables + !=========== + + ! in + integer, intent(in) :: tmp_n, m, list(m) + ! tmp_n : integer, tmp_n = m*(m-1)/2 + + ! out + double precision, intent(out) :: H(tmp_n)!, h_tmpr(m,m,m,m) + ! H : n by n double precision matrix containing the 2D hessian + + ! internal + double precision, allocatable :: hessian(:,:,:,:), tmp(:,:),h_tmpr(:,:,:,:) + integer :: p,q, tmp_p,tmp_q + integer :: r,s,t,u,v,tmp_r,tmp_s,tmp_t,tmp_u,tmp_v + integer :: pq,rs,tmp_pq,tmp_rs + double precision :: t1,t2,t3 + ! hessian : mo_num 4D double precision matrix containing the hessian before the permutations + ! h_tmpr : mo_num 4D double precision matrix containing the hessian after the permutations + ! p,q,r,s : integer, indexes of the 4D hessian matrix + ! t,u,v : integer, indexes to compute hessian elements + ! pq,rs : integer, indexes for the conversion from 4D to 2D hessian matrix + ! t1,t2,t3 : double precision, t3 = t2 - t1, time to compute the hessian + + ! Function + double precision :: get_two_e_integral + ! get_two_e_integral : double precision function, two e integrals + + ! Provided : + ! mo_one_e_integrals : mono e- integrals + ! get_two_e_integral : two e- integrals + ! one_e_dm_mo_alpha, one_e_dm_mo_beta : one body density matrix + ! two_e_dm_mo : two body density matrix + + print*,'---first_diag_hess_list---' + + !============ + ! Allocation + !============ + + allocate(hessian(m,m,m,m),tmp(tmp_n,tmp_n),h_tmpr(mo_num,mo_num,mo_num,mo_num)) + + !============= + ! Calculation + !============= + + ! From Anderson et. al. (2014) + ! The Journal of Chemical Physics 141, 244104 (2014); doi: 10.1063/1.4904384 + + ! LaTeX formula : + + !\begin{align*} + !H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + !&= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + !+ \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_u^r)] + !-(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + !&+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} +v_{uv}^{st} \Gamma_{pt}^{uv}) + !+ \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + !&+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{ps}^{uv}) \\ + !&- \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) + !\end{align*} + + !================ + ! Initialization + !================ + hessian = 0d0 + + CALL wall_time(t1) + + !======================== + ! First line, first term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (q==r) then + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !========================= + ! First line, second term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (p==s) then + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !======================== + ! First line, third term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + endif + + enddo + enddo + enddo + enddo + + !========================= + ! Second line, first term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !========================== + ! Second line, second term + !========================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !======================== + ! Third line, first term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + endif + + enddo + enddo + enddo + enddo + + !========================= + ! Third line, second term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do t = 1, mo_num + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + endif + + enddo + enddo + enddo + enddo + + CALL wall_time(t2) + t2 = t2 - t1 + print*, 'Time to compute the hessian :', t2 + + !============== + ! Permutations + !============== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + + h_tmpr(tmp_p,tmp_q,tmp_r,tmp_s) = (hessian(tmp_p,tmp_q,tmp_r,tmp_s) - hessian(tmp_q,tmp_p,tmp_r,tmp_s) & + - hessian(tmp_p,tmp_q,tmp_s,tmp_r) + hessian(tmp_q,tmp_p,tmp_s,tmp_r)) + + enddo + enddo + enddo + enddo + + !======================== + ! 4D matrix to 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo + enddo + + !======================== + ! 4D matrix to 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p vector, transformation +! In addition there is a permutation in the gradient formula : +! \begin{equation} +! P_{pq} = 1 - (p <-> q) +! \end{equation} + +! We need a vector to use the gradient. Here the gradient is a +! antisymetric matrix so we can transform it in a vector of length +! mo_num*(mo_num-1)/2. + +! Here we do these two things at the same time. + + +do i=1,n + call vec_to_mat_index(i,p,q) + v_grad(i)=(grad(p,q) - grad(q,p)) +enddo + +! Debug, diplay the vector containing the gradient elements +if (debug) then + print*,'Vector containing the gradient :' + write(*,'(100(F10.5))') v_grad(1:n) +endif + +! Norm of the gradient +! The norm can be useful. + +norm = dnrm2(n,v_grad,1) +print*, 'Gradient norm : ', norm + +! Maximum element in the gradient +! The maximum element in the gradient is very important for the +! convergence criterion of the Newton method. + + +! Max element of the gradient +max_elem = 0d0 +do i = 1, n + if (DABS(v_grad(i)) > DABS(max_elem)) then + max_elem = v_grad(i) + endif +enddo + +print*,'Max element in the gradient :', max_elem + +! Debug, display the matrix containting the gradient elements +if (debug) then + ! Matrix gradient + A = 0d0 + do q=1,m + do p=1,m + A(p,q) = grad(p,q) - grad(q,p) + enddo + enddo + print*,'Matrix containing the gradient :' + do i = 1, m + write(*,'(100(F10.5))') A(i,1:m) + enddo +endif + +! Deallocation of shared arrays and end + +deallocate(grad,A, tmp_mo_one_e_integrals,tmp_one_e_dm_mo) + +print*,'---End gradient---' + +end subroutine diff --git a/src/mo_optimization/gradient_opt.irp.f b/src/mo_optimization/gradient_opt.irp.f new file mode 100644 index 00000000..25be6b5a --- /dev/null +++ b/src/mo_optimization/gradient_opt.irp.f @@ -0,0 +1,346 @@ +! Gradient + +! The gradient of the CI energy with respects to the orbital rotation +! is: +! (C-c C-x C-l) +! $$ +! G(p,q) = \mathcal{P}_{pq} \left[ \sum_r (h_p^r \gamma_r^q - h_r^q \gamma_p^r) + +! \sum_{rst}(v_{pt}^{rs} \Gamma_{rs}^{qt} - v_{rs}^{qt} \Gamma_{pt}^{rs}) +! \right] +! $$ + + +! $$ +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! $$ + +! $$ +! G(p,q) = \left[ +! \sum_r (h_p^r \gamma_r^q - h_r^q \gamma_p^r) + +! \sum_{rst}(v_{pt}^{rs} \Gamma_{rs}^{qt} - v_{rs}^{qt} \Gamma_{pt}^{rs}) +! \right] - +! \left[ +! \sum_r (h_q^r \gamma_r^p - h_r^p \gamma_q^r) + +! \sum_{rst}(v_{qt}^{rs} \Gamma_{rs}^{pt} - v_{rs}^{pt} +! \Gamma_{qt}^{rs}) +! \right] +! $$ + +! Where p,q,r,s,t are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! The gradient is a mo_num by mo_num matrix, p,q,r,s,t take all the +! values between 1 and mo_num (1 and mo_num include). + +! To do that we compute $$G(p,q)$$ for all the pairs (p,q). + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo +! E. Scuseria + +! *Compute the gradient of energy with respects to orbital rotations* + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono_electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix | +! | two_e_dm_mo(mo_num,mo_num,mo_num,mo_num) | double precision | two e- density matrix | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | v_grad(n) | double precision | the gradient | +! | max_elem | double precision | maximum element of the gradient | + +! Internal: +! | grad(mo_num,mo_num) | double precison | gradient before the tranformation in a vector | +! | A((mo_num,mo_num) | doubre precision | gradient after the permutations | +! | norm | double precision | norm of the gradient | +! | p, q | integer | indexes of the element in the matrix grad | +! | i | integer | index for the tranformation in a vector | +! | r, s, t | integer | indexes dor the sums | +! | t1, t2, t3 | double precision | t3 = t2 - t1, time to compute the gradient | +! | t4, t5, t6 | double precission | t6 = t5 - t4, time to compute each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bi-electronic integrals | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the two e- density matrix | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array | + +! Function: +! | get_two_e_integral | double precision | bi-electronic integrals | +! | dnrm2 | double precision | (Lapack) norm | + + +subroutine gradient_opt(n,v_grad,max_elem) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: v_grad(n), max_elem + + ! internal + double precision, allocatable :: grad(:,:),A(:,:) + double precision :: norm + integer :: i,p,q,r,s,t + double precision :: t1,t2,t3,t4,t5,t6 + + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:) + + ! Functions + double precision :: get_two_e_integral, dnrm2 + + + print*,'' + print*,'---gradient---' + + ! Allocation of shared arrays + allocate(grad(mo_num,mo_num),A(mo_num,mo_num)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s,t, & + !$OMP tmp_accu, tmp_bi_int_3, tmp_2rdm_3) & + !$OMP SHARED(grad, one_e_dm_mo, mo_num,mo_one_e_integrals, & + !$OMP mo_integrals_map,t4,t5,t6) & + !$OMP DEFAULT(SHARED) + + ! Allocation of private arrays + allocate(tmp_accu(mo_num,mo_num)) + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num)) + +! Initialization + +!$OMP DO +do q = 1, mo_num + do p = 1,mo_num + grad(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +! Term 1 + +! Without optimization the term 1 is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! grad(p,q) = grad(p,q) & +! + mo_one_e_integrals(p,r) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(r,q) * one_e_dm_mo(p,r) +! enddo +! enddo +! enddo + +! Since the matrix multiplication A.B is defined like : +! \begin{equation} +! c_{ij} = \sum_k a_{ik}.b_{kj} +! \end{equation} +! The previous equation can be rewritten as a matrix multplication + + +!**************** +! Opt first term +!**************** + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +call dgemm('N','N',mo_num,mo_num,mo_num,1d0,mo_one_e_integrals,& +mo_num,one_e_dm_mo,mo_num,0d0,tmp_accu,mo_num) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + grad(p,q) = grad(p,q) + (tmp_accu(p,q) - tmp_accu(q,p)) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'Gradient, first term (s) :', t6 +!$OMP END MASTER + +! Term 2 + +! Without optimization the second term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num +! do t= 1, mo_num + +! grad(p,q) = grad(p,q) & +! + get_two_e_integral(p,t,r,s,mo_integrals_map) * two_e_dm_mo(r,s,q,t) & +! - get_two_e_integral(r,s,q,t,mo_integrals_map) * two_e_dm_mo(p,t,r,s) +! enddo +! enddo +! enddo +! enddo +! enddo + +! Using the bielectronic integral properties : +! get_two_e_integral(p,t,r,s,mo_integrals_map) = get_two_e_integral(r,s,p,t,mo_integrals_map) + +! Using the two body matrix properties : +! two_e_dm_mo(p,t,r,s) = two_e_dm_mo(r,s,p,t) + +! t is one the right, we can put it on the external loop and create 3 +! indexes temporary array +! r,s can be seen as one index + +! By doing so, a matrix multiplication appears + + +!***************** +! Opt second term +!***************** + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do s = 1, mo_num + do r = 1, mo_num + + tmp_bi_int_3(r,s,p) = get_two_e_integral(r,s,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do q = 1, mo_num + do s = 1, mo_num + do r = 1, mo_num + + tmp_2rdm_3(r,s,q) = two_e_dm_mo(r,s,q,t) + + enddo + enddo + enddo + + call dgemm('T','N',mo_num,mo_num,mo_num*mo_num,1d0,tmp_bi_int_3,& + mo_num*mo_num,tmp_2rdm_3,mo_num*mo_num,0d0,tmp_accu,mo_num) + + !$OMP CRITICAL + do q = 1, mo_num + do p = 1, mo_num + + grad(p,q) = grad(p,q) + tmp_accu(p,q) - tmp_accu(q,p) + + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'Gradient second term (s) : ', t6 +!$OMP END MASTER + +! Deallocation of private arrays + +deallocate(tmp_bi_int_3,tmp_2rdm_3,tmp_accu) + +!$OMP END PARALLEL + +call omp_set_max_active_levels(4) + +! Permutation, 2D matrix -> vector, transformation +! In addition there is a permutation in the gradient formula : +! \begin{equation} +! P_{pq} = 1 - (p <-> q) +! \end{equation} + +! We need a vector to use the gradient. Here the gradient is a +! antisymetric matrix so we can transform it in a vector of length +! mo_num*(mo_num-1)/2. + +! Here we do these two things at the same time. + + +do i=1,n + call vec_to_mat_index(i,p,q) + v_grad(i)=(grad(p,q) - grad(q,p)) +enddo + +! Debug, diplay the vector containing the gradient elements +if (debug) then + print*,'Vector containing the gradient :' + write(*,'(100(F10.5))') v_grad(1:n) +endif + +! Norm of the gradient +! The norm can be useful. + +norm = dnrm2(n,v_grad,1) +print*, 'Gradient norm : ', norm + +! Maximum element in the gradient +! The maximum element in the gradient is very important for the +! convergence criterion of the Newton method. + + +! Max element of the gradient +max_elem = 0d0 +do i = 1, n + if (ABS(v_grad(i)) > ABS(max_elem)) then + max_elem = v_grad(i) + endif +enddo + +print*,'Max element in the gradient :', max_elem + +! Debug, display the matrix containting the gradient elements +if (debug) then + ! Matrix gradient + A = 0d0 + do q=1,mo_num + do p=1,mo_num + A(p,q) = grad(p,q) - grad(q,p) + enddo + enddo + print*,'Matrix containing the gradient :' + do i = 1, mo_num + write(*,'(100(F10.5))') A(i,1:mo_num) + enddo +endif + +! Deallocation of shared arrays and end + +deallocate(grad,A) + +print*,'---End gradient---' + +end subroutine diff --git a/src/mo_optimization/hessian_list_opt.irp.f b/src/mo_optimization/hessian_list_opt.irp.f new file mode 100644 index 00000000..31af769a --- /dev/null +++ b/src/mo_optimization/hessian_list_opt.irp.f @@ -0,0 +1,1129 @@ +! Hessian + +! The hessian of the CI energy with respects to the orbital rotation is : +! (C-c C-x C-l) + +! \begin{align*} +! H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ +! &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ +! &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ +! &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} +! With pq a permutation operator : + +! \begin{align*} +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! \end{align*} +! \begin{align*} +! \mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +! &= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +! \end{align*} + +! Where p,q,r,s,t,u,v are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! The hessian is a 4D matrix of size mo_num, p,q,r,s,t,u,v take all the +! values between 1 and mo_num (1 and mo_num include). + +! To do that we compute all the pairs (pq,rs) + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +! *Compute the hessian of energy with respects to orbital rotations* + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +! | two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | H(n,n) | double precision | Hessian matrix | +! | h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +! | | | in n by n matrix | + +! Internal: +! | hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +! | | | the permutations | +! | p, q, r, s | integer | indexes of the hessian elements | +! | t, u, v | integer | indexes for the sums | +! | pq, rs | integer | indexes for the transformation of the hessian | +! | | | (4D -> 2D) | +! | t1,t2,t3 | double precision | t3 = t2 - t1, time to compute the hessian | +! | t4,t5,t6 | double precision | t6 = t5 - t4, time to compute each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix | +! | ind_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for matrix multiplication | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array | +! | tmp_accu_sym(mo_num,mo_num) | double precision | temporary array | + +! Function: +! | get_two_e_integral | double precision | bielectronic integrals | + + +subroutine hessian_list_opt(n,m,list,H,h_tmpr) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n,m,list(m) + + ! out + double precision, intent(out) :: H(n,n),h_tmpr(m,m,m,m) + + ! internal + double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + double precision, allocatable :: H_test(:,:) + integer :: p,q,tmp_p,tmp_q,tmp_r,tmp_s + integer :: r,s,t,u,v,k + integer :: pq,rs + double precision :: t1,t2,t3,t4,t5,t6 + ! H_test : monum**2 by mo_num**2 double precision matrix to debug the H matrix + + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:), ind_3(:,:,:),ind_3_3(:,:,:) + double precision, allocatable :: tmp_bi_int_3_3(:,:,:), tmp_2rdm_3_3(:,:,:) + double precision, allocatable :: tmp_accu(:,:), tmp_accu_sym(:,:),tmp_one_e_dm_mo(:,:),tmp_mo_one_e_integrals(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'---hessian---' + print*,'Use the full hessian' + + ! Allocation of shared arrays + allocate(hessian(m,m,m,m),tmp_one_e_dm_mo(mo_num,m),tmp_mo_one_e_integrals(mo_num,m)) + + ! Calculations + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP tmp_p,tmp_q,tmp_r,tmp_s,p,q,r,s, tmp_accu, tmp_accu_sym, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, ind_3, tmp_bi_int_3_3,tmp_2rdm_3_3, ind_3_3 ) & + !$OMP SHARED(m,list,hessian,h_tmpr,H, mo_num,n, & + !$OMP mo_one_e_integrals, one_e_dm_mo, & + !$OMP two_e_dm_mo,mo_integrals_map, & + !$OMP t1,t2,t3,t4,t5,t6,& + !$OMP tmp_mo_one_e_integrals,tmp_one_e_dm_mo)& + !$OMP DEFAULT(NONE) + + ! Allocation of private arrays + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num), ind_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(m,m), tmp_accu_sym(mo_num,mo_num)) + +! Initialization of the arrays + +!$OMP MASTER +do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END MASTER + +!$OMP MASTER +do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_sym(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END MASTER + +!$OMP DO +do tmp_s = 1, m + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = 0d0 + enddo + enddo + enddo +enddo +!$OMP ENDDO + +!$OMP MASTER +CALL wall_TIME(t1) +!$OMP END MASTER + +! Line 1, term 1 + +! Without optimization the term 1 of the line 1 is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (q==r) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! We can write the formula as matrix multiplication. +! $$c_{p,s} = \sum_u a_{p,u} b_{u,s}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + tmp_mo_one_e_integrals(u,tmp_p) = mo_one_e_integrals(u,p) + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_s = 1, m + s = list(tmp_s) + do u = 1, mo_num + tmp_one_e_dm_mo(u,tmp_s) = one_e_dm_mo(u,s) + enddo +enddo +!$OMP END DO + + +call dgemm('T','N', m, m, mo_num, 1d0, tmp_mo_one_e_integrals,& + size(tmp_mo_one_e_integrals,1), tmp_one_e_dm_mo, size(tmp_one_e_dm_mo,1),& + 0d0, tmp_accu, size(tmp_accu,1)) + +!$OMP DO +do tmp_s = 1, m + do tmp_p = 1, m + + tmp_accu_sym(tmp_p,tmp_s) = 0.5d0 * (tmp_accu(tmp_p,tmp_s) + tmp_accu(tmp_s,tmp_p)) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_s = 1, m + do tmp_p = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_r,tmp_r,tmp_s) = hessian(tmp_p,tmp_r,tmp_r,tmp_s) + tmp_accu_sym(tmp_p,tmp_s) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 1',t6 +!$OMP END MASTER + +! Line 1, term 2 +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (p==s) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,r) * (one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * (one_e_dm_mo(r,u)) +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! We can write the formula as matrix multiplication. +! $$c_{r,q} = \sum_u a_{r,u} b_{u,q}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +call dgemm('T','N', m, m, mo_num, 1d0, tmp_mo_one_e_integrals,& + size(tmp_mo_one_e_integrals,1), tmp_one_e_dm_mo, size(tmp_one_e_dm_mo,1),& + 0d0, tmp_accu, size(tmp_accu,1)) + +!$OMP DO +do tmp_r = 1, m + do tmp_q = 1, m + + tmp_accu_sym(tmp_q,tmp_r) = 0.5d0 * (tmp_accu(tmp_q,tmp_r) + tmp_accu(tmp_r,tmp_q)) + + enddo +enddo +!OMP END DO + +!$OMP DO +do tmp_r = 1, m + do tmp_q = 1, m + do tmp_s = 1, m + + hessian(tmp_s,tmp_q,tmp_r,tmp_s) = hessian(tmp_s,tmp_q,tmp_r,tmp_s) + tmp_accu_sym(tmp_q,tmp_r) + + enddo + enddo +enddo +!OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 2',t6 +!$OMP END MASTER + +! Line 1, term 3 + +! Without optimization the third term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s)) + +! enddo +! enddo +! enddo +! enddo + +! We can just re-order the indexes + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_s = 1, m + s = list(tmp_s) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q)& + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + enddo + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 3',t6 +!$OMP END MASTER + +! Line 2, term 1 + +! Without optimization the fourth term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (q==r) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + +! enddo +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! Using bielectronic integral properties : +! get_two_e_integral(s,t,u,v,mo_integrals_map) = +! get_two_e_integral(u,v,s,t,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(p,t,u,v) = two_e_dm_mo(u,v,p,t) + +! With t on the external loop, using temporary arrays for each t and by +! taking u,v as one variable a matrix multplication appears. +! $$c_{p,s} = \sum_{uv} a_{p,uv} b_{uv,s}$$ + +! There is a kroenecker delta $$\delta_{qr}$$, so we juste compute the +! terms like : hessian(p,r,r,s) + + +!$OMP MASTER +call wall_TIME(t4) +!$OMP END MASTER + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m ! error, the p might be replace by a s + ! it's a temporary array, the result by replacing p and s will be the same + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1.d0, & + tmp_bi_int_3_3, mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do tmp_p = 1, m + do tmp_s = 1, m + + tmp_accu_sym(tmp_s,tmp_p) = 0.5d0 * (tmp_accu(tmp_p,tmp_s)+tmp_accu(tmp_s,tmp_p)) + + enddo + enddo + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_p = 1, m + + hessian(tmp_p,tmp_r,tmp_r,tmp_s) = hessian(tmp_p,tmp_r,tmp_r,tmp_s) + tmp_accu_sym(tmp_p,tmp_s) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6=t5-t4 +print*,'l2 1', t6 +!$OMP END MASTER + +! Line 2, term 2 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (p==s) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + +! enddo +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! Using the two electron density matrix properties : +! get_two_e_integral(q,t,u,v,mo_integrals_map) = +! get_two_e_integral(u,v,q,t,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(r,t,u,v) = two_e_dm_mo(u,v,r,t) + +! With t on the external loop, using temporary arrays for each t and by +! taking u,v as one variable a matrix multplication appears. +! $$c_{q,r} = \sum_uv a_{q,uv} b_{uv,r}$$ + +! There is a kroenecker delta $$\delta_{ps}$$, so we juste compute the +! terms like : hessian(s,q,r,s) + + +!****************************** +! Opt Second line, second term +!****************************** + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + + + +!$OMP DO +do t = 1, mo_num + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,v,tmp_q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_r = 1, m + r = list(tmp_r) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,v,tmp_r) = two_e_dm_mo(u,v,r,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1.d0, & + tmp_bi_int_3_3 , mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do tmp_r = 1, m + do tmp_q = 1, m + + tmp_accu_sym(tmp_q,tmp_r) = 0.5d0 * (tmp_accu(tmp_q,tmp_r) + tmp_accu(tmp_r,tmp_q)) + + enddo + enddo + + !$OMP CRITICAL + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_s = 1, m + + hessian(tmp_s,tmp_q,tmp_r,tmp_s) = hessian(tmp_s,tmp_q,tmp_r,tmp_s) + tmp_accu_sym(tmp_q,tmp_r) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l2 2',t6 +!$OMP END MASTER + +! Line 3, term 1 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + +! enddo +! enddo + +! enddo +! enddo +! enddo +! enddo + +! Using the two electron density matrix properties : +! get_two_e_integral(u,v,p,r,mo_integrals_map) = +! get_two_e_integral(p,r,u,v,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(u,v,q,s) = two_e_dm_mo(q,s,u,v) + +! With v on the external loop, using temporary arrays for each v and by +! taking p,r and q,s as one dimension a matrix multplication +! appears. $$c_{pr,qs} = \sum_u a_{pr,u} b_{u,qs}$$ + +! Part 1 + +!$OMP MASTER +call wall_TIME(t4) +!$OMP END MASTER + +!-------- +! part 1 +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) +!-------- + +allocate(tmp_bi_int_3_3(m,m,mo_num), tmp_2rdm_3_3(mo_num,m,m),ind_3_3(m,m,m)) + +!$OMP DO +do v = 1, mo_num + + do u = 1, mo_num + do tmp_r = 1, m + r = list(tmp_r) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_bi_int_3_3(tmp_p,tmp_r,u) = get_two_e_integral(p,r,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + + tmp_2rdm_3_3(u,tmp_q,tmp_s) = two_e_dm_mo(q,s,u,v) + + enddo + enddo + enddo + + do tmp_s = 1, m + + call dgemm('N','N',m*m, m, mo_num, 1d0, tmp_bi_int_3_3,& + size(tmp_bi_int_3_3,1)*size(tmp_bi_int_3_3,2), tmp_2rdm_3_3(1,1,tmp_s),& + mo_num, 0d0, ind_3_3, size(ind_3_3,1) * size(ind_3_3,2)) + + !$OMP CRITICAL + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + ind_3_3(tmp_p,tmp_r,tmp_q) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3,ind_3_3) + + + +! With v on the external loop, using temporary arrays for each v and by +! taking q,s and p,r as one dimension a matrix multplication +! appears. $$c_{qs,pr} = \sum_u a_{qs,u}*b_{u,pr}$$ + +! Part 2 + +!-------- +! part 2 +! get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +!-------- + +allocate(tmp_bi_int_3_3(m,m,mo_num), tmp_2rdm_3_3(mo_num,m,m),ind_3_3(m,m,m)) + +!$OMP DO +do v = 1, mo_num + + do u = 1, mo_num + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + + tmp_bi_int_3_3(tmp_q,tmp_s,u) = get_two_e_integral(q,s,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_r = 1, m + r = list(tmp_r) + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_2rdm_3_3(u,tmp_p,tmp_r) = two_e_dm_mo(p,r,u,v) + + enddo + enddo + enddo + + do tmp_r = 1, m + + call dgemm('N','N', m*m, m, mo_num, 1d0, tmp_bi_int_3_3,& + size(tmp_bi_int_3_3,1)*size(tmp_bi_int_3_3,2), tmp_2rdm_3_3(1,1,tmp_r),& + mo_num, 0d0, ind_3_3, size(ind_3_3,1) * size(ind_3_3,2)) + + !$OMP CRITICAL + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + ind_3_3(tmp_q,tmp_s,tmp_p) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3,ind_3_3) + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5 - t4 +print*,'l3 1', t6 +!$OMP END MASTER + +! Line 3, term 2 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! do t = 1, mo_num +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + +! enddo +! enddo + +! enddo +! enddo +! enddo +! enddo + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 1 + +!-------- +! Part 1 +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) +!-------- + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_r) = two_e_dm_mo(q,u,r,t) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_s) = - get_two_e_integral(u,s,t,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_3,& + mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 2 + +!-------- +! Part 2 +!- get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) +!-------- + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_r) = two_e_dm_mo(q,u,t,r) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_s) = - get_two_e_integral(u,t,s,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_3,& + mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 3 + +!-------- +! Part 3 +!- get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) +!-------- + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_r) = - get_two_e_integral(u,q,t,r,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_s) = two_e_dm_mo(p,u,s,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_3,& + mo_num*mo_num, tmp_bi_int_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 4 + +!-------- +! Part 4 +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +!-------- + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_r) = - get_two_e_integral(u,t,r,q,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_s) = two_e_dm_mo(p,u,t,s) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_3,& + mo_num*mo_num, tmp_bi_int_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5-t4 +print*,'l3 2',t6 +!$OMP END MASTER + +!$OMP MASTER +CALL wall_TIME(t2) +t3 = t2 -t1 +print*,'Time to compute the hessian : ', t3 +!$OMP END MASTER + +! Deallocation of private arrays +! In the omp section ! + +deallocate(tmp_bi_int_3, tmp_2rdm_3, tmp_accu, tmp_accu_sym, ind_3) + +! Permutations +! As we mentioned before there are two permutation operator in the +! formula : +! Hessian(p,q,r,s) = P_pq P_rs [...] +! => Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do s = 1, m + do r = 1, m + do q = 1, m + do p = 1, m + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5-t4 +print*,'Time for permutations :',t6 +!$OMP END MASTER + +! 4D -> 2D matrix +! We need a 2D matrix for the Newton method's. Since the Hessian is +! "antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +! We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +! with p 2D :',t6 +!$OMP END MASTER + +!$OMP END PARALLEL +call omp_set_max_active_levels(4) + +! Display +if (debug) then + print*,'2D Hessian matrix' + do pq = 1, n + write(*,'(100(F10.5))') H(pq,:) + enddo +endif + +! Deallocation of shared arrays, end + +deallocate(hessian,tmp_one_e_dm_mo,tmp_mo_one_e_integrals)!,h_tmpr) +! h_tmpr is intent out in order to debug the subroutine +! It's why we don't deallocate it + + print*,'---End hessian---' + +end subroutine diff --git a/src/mo_optimization/hessian_opt.irp.f b/src/mo_optimization/hessian_opt.irp.f new file mode 100644 index 00000000..0b4312c6 --- /dev/null +++ b/src/mo_optimization/hessian_opt.irp.f @@ -0,0 +1,1043 @@ +! Hessian + +! The hessian of the CI energy with respects to the orbital rotation is : +! (C-c C-x C-l) + +! \begin{align*} +! H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ +! &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ +! &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ +! &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} +! With pq a permutation operator : + +! \begin{align*} +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! \end{align*} +! \begin{align*} +! \mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +! &= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +! \end{align*} + +! Where p,q,r,s,t,u,v are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! The hessian is a 4D matrix of size mo_num, p,q,r,s,t,u,v take all the +! values between 1 and mo_num (1 and mo_num include). + +! To do that we compute all the pairs (pq,rs) + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +! *Compute the hessian of energy with respects to orbital rotations* + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +! | two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | H(n,n) | double precision | Hessian matrix | +! | h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +! | | | in n by n matrix | + +! Internal: +! | hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +! | | | the permutations | +! | p, q, r, s | integer | indexes of the hessian elements | +! | t, u, v | integer | indexes for the sums | +! | pq, rs | integer | indexes for the transformation of the hessian | +! | | | (4D -> 2D) | +! | t1,t2,t3 | double precision | t3 = t2 - t1, time to compute the hessian | +! | t4,t5,t6 | double precision | t6 = t5 - t4, time to compute each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix | +! | ind_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for matrix multiplication | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array | +! | tmp_accu_sym(mo_num,mo_num) | double precision | temporary array | + +! Function: +! | get_two_e_integral | double precision | bielectronic integrals | + + +subroutine hessian_opt(n,H,h_tmpr) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: H(n,n),h_tmpr(mo_num,mo_num,mo_num,mo_num) + + ! internal + double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + double precision, allocatable :: H_test(:,:) + integer :: p,q + integer :: r,s,t,u,v,k + integer :: pq,rs + double precision :: t1,t2,t3,t4,t5,t6 + ! H_test : monum**2 by mo_num**2 double precision matrix to debug the H matrix + + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:), ind_3(:,:,:) + double precision, allocatable :: tmp_accu(:,:), tmp_accu_sym(:,:), tmp_accu_shared(:,:),tmp_accu_sym_shared(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'---hessian---' + print*,'Use the full hessian' + + ! Allocation of shared arrays + allocate(hessian(mo_num,mo_num,mo_num,mo_num))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_accu_shared(mo_num,mo_num),tmp_accu_sym_shared(mo_num,mo_num)) + + ! Calculations + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu, tmp_accu_sym, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, ind_3) & + !$OMP SHARED(hessian,h_tmpr,H, mo_num,n, & + !$OMP mo_one_e_integrals, one_e_dm_mo, & + !$OMP two_e_dm_mo,mo_integrals_map,tmp_accu_sym_shared, tmp_accu_shared, & + !$OMP t1,t2,t3,t4,t5,t6)& + !$OMP DEFAULT(NONE) + + ! Allocation of private arrays + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num), ind_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(mo_num,mo_num), tmp_accu_sym(mo_num,mo_num)) + +! Initialization of the arrays + +!$OMP MASTER +do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo +enddo +!$OMP END MASTER + +!$OMP MASTER +do q = 1, mo_num + do p = 1, mo_num + tmp_accu_sym(p,q) = 0d0 + enddo +enddo +!$OMP END MASTER + +!$OMP DO +do s=1,mo_num + do r=1,mo_num + do q=1,mo_num + do p=1,mo_num + hessian(p,q,r,s) = 0d0 + enddo + enddo + enddo +enddo +!$OMP ENDDO + +!$OMP MASTER +CALL wall_TIME(t1) +!$OMP END MASTER + +! Line 1, term 1 + +! Without optimization the term 1 of the line 1 is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (q==r) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! We can write the formula as matrix multiplication. +! $$c_{p,s} = \sum_u a_{p,u} b_{u,s}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +call dgemm('T','N', mo_num, mo_num, mo_num, 1d0, mo_one_e_integrals,& + size(mo_one_e_integrals,1), one_e_dm_mo, size(one_e_dm_mo,1),& + 0d0, tmp_accu_shared, size(tmp_accu_shared,1)) + +!$OMP DO +do s = 1, mo_num + do p = 1, mo_num + + tmp_accu_sym_shared(p,s) = 0.5d0 * (tmp_accu_shared(p,s) + tmp_accu_shared(s,p)) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do s = 1, mo_num + do p = 1, mo_num + do r = 1, mo_num + + hessian(p,r,r,s) = hessian(p,r,r,s) + tmp_accu_sym_shared(p,s) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 1',t6 +!$OMP END MASTER + +! Line 1, term 2 +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (p==s) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,r) * (one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * (one_e_dm_mo(r,u)) +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! We can write the formula as matrix multiplication. +! $$c_{r,q} = \sum_u a_{r,u} b_{u,q}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +call dgemm('T','N', mo_num, mo_num, mo_num, 1d0, mo_one_e_integrals,& + size(mo_one_e_integrals,1), one_e_dm_mo, size(one_e_dm_mo,1),& + 0d0, tmp_accu_shared, size(tmp_accu_shared,1)) + +!$OMP DO +do r = 1, mo_num + do q = 1, mo_num + + tmp_accu_sym_shared(q,r) = 0.5d0 * (tmp_accu_shared(q,r) + tmp_accu_shared(r,q)) + + enddo +enddo +!OMP END DO + +!$OMP DO +do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + + hessian(s,q,r,s) = hessian(s,q,r,s) + tmp_accu_sym_shared(q,r) + + enddo + enddo +enddo +!OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 2',t6 +!$OMP END MASTER + +! Line 1, term 3 + +! Without optimization the third term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s)) + +! enddo +! enddo +! enddo +! enddo + +! We can just re-order the indexes + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q)& + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + enddo + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 3',t6 +!$OMP END MASTER + +! Line 2, term 1 + +! Without optimization the fourth term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (q==r) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + +! enddo +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! Using bielectronic integral properties : +! get_two_e_integral(s,t,u,v,mo_integrals_map) = +! get_two_e_integral(u,v,s,t,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(p,t,u,v) = two_e_dm_mo(u,v,p,t) + +! With t on the external loop, using temporary arrays for each t and by +! taking u,v as one variable a matrix multplication appears. +! $$c_{p,s} = \sum_{uv} a_{p,uv} b_{uv,s}$$ + +! There is a kroenecker delta $$\delta_{qr}$$, so we juste compute the +! terms like : hessian(p,r,r,s) + + +!$OMP MASTER +call wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num ! error, the p might be replace by a s + ! it's a temporary array, the result by replacing p and s will be the same + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1.d0, & + tmp_bi_int_3, mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do p = 1, mo_num + do s = 1, mo_num + + tmp_accu_sym(s,p) = 0.5d0 * (tmp_accu(p,s)+tmp_accu(s,p)) + + enddo + enddo + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + do p = 1, mo_num + + hessian(p,r,r,s) = hessian(p,r,r,s) + tmp_accu_sym(p,s) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6=t5-t4 +print*,'l2 1', t6 +!$OMP END MASTER + +! Line 2, term 2 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (p==s) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + +! enddo +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! Using the two electron density matrix properties : +! get_two_e_integral(q,t,u,v,mo_integrals_map) = +! get_two_e_integral(u,v,q,t,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(r,t,u,v) = two_e_dm_mo(u,v,r,t) + +! With t on the external loop, using temporary arrays for each t and by +! taking u,v as one variable a matrix multplication appears. +! $$c_{q,r} = \sum_uv a_{q,uv} b_{uv,r}$$ + +! There is a kroenecker delta $$\delta_{ps}$$, so we juste compute the +! terms like : hessian(s,q,r,s) + + +!****************************** +! Opt Second line, second term +!****************************** + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do t = 1, mo_num + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do r = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,r) = two_e_dm_mo(u,v,r,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1.d0, & + tmp_bi_int_3 , mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do r = 1, mo_num + do q = 1, mo_num + + tmp_accu_sym(q,r) = 0.5d0 * (tmp_accu(q,r) + tmp_accu(r,q)) + + enddo + enddo + + !$OMP CRITICAL + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + + hessian(s,q,r,s) = hessian(s,q,r,s) + tmp_accu_sym(q,r) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l2 2',t6 +!$OMP END MASTER + +! Line 3, term 1 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + +! enddo +! enddo + +! enddo +! enddo +! enddo +! enddo + +! Using the two electron density matrix properties : +! get_two_e_integral(u,v,p,r,mo_integrals_map) = +! get_two_e_integral(p,r,u,v,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(u,v,q,s) = two_e_dm_mo(q,s,u,v) + +! With v on the external loop, using temporary arrays for each v and by +! taking p,r and q,s as one dimension a matrix multplication +! appears. $$c_{pr,qs} = \sum_u a_{pr,u} b_{u,qs}$$ + +! Part 1 + +!$OMP MASTER +call wall_TIME(t4) +!$OMP END MASTER + +!-------- +! part 1 +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) +!-------- + +!$OMP DO +do v = 1, mo_num + + do u = 1, mo_num + do r = 1, mo_num + do p = 1, mo_num + + tmp_bi_int_3(p,r,u) = get_two_e_integral(p,r,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do s = 1, mo_num + do q = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,q,s) = two_e_dm_mo(q,s,u,v) + + enddo + enddo + enddo + + do s = 1, mo_num + + call dgemm('N','N',mo_num*mo_num, mo_num, mo_num, 1d0, tmp_bi_int_3,& + size(tmp_bi_int_3,1)*size(tmp_bi_int_3,2), tmp_2rdm_3(1,1,s),& + size(tmp_2rdm_3,1), 0d0, ind_3, size(ind_3,1) * size(ind_3,2)) + + !$OMP CRITICAL + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + hessian(p,q,r,s) = hessian(p,q,r,s) + ind_3(p,r,q) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + + + +! With v on the external loop, using temporary arrays for each v and by +! taking q,s and p,r as one dimension a matrix multplication +! appears. $$c_{qs,pr} = \sum_u a_{qs,u}*b_{u,pr}$$ + +! Part 2 + +!-------- +! part 2 +! get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +!-------- + +!$OMP DO +do v = 1, mo_num + + do u = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + + tmp_bi_int_3(q,s,u) = get_two_e_integral(q,s,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do r = 1, mo_num + do p = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,p,r) = two_e_dm_mo(p,r,u,v) + + enddo + enddo + enddo + + do r = 1, mo_num + call dgemm('N','N', mo_num*mo_num, mo_num, mo_num, 1d0, tmp_bi_int_3,& + size(tmp_bi_int_3,1)*size(tmp_bi_int_3,2), tmp_2rdm_3(1,1,r),& + size(tmp_2rdm_3,1), 0d0, ind_3, size(ind_3,1) * size(ind_3,2)) + + !$OMP CRITICAL + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + hessian(p,q,r,s) = hessian(p,q,r,s) + ind_3(q,s,p) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5 - t4 +print*,'l3 1', t6 +!$OMP END MASTER + +! Line 3, term 2 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! do t = 1, mo_num +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + +! enddo +! enddo + +! enddo +! enddo +! enddo +! enddo + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 1 + +!-------- +! Part 1 +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) +!-------- + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,r) = two_e_dm_mo(q,u,r,t) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,s) = - get_two_e_integral(u,s,t,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3,& + mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 2 + +!-------- +! Part 2 +!- get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) +!-------- + +!$OMP DO +do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,r) = two_e_dm_mo(q,u,t,r) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,s) = - get_two_e_integral(u,t,s,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3,& + mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 3 + +!-------- +! Part 3 +!- get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) +!-------- + +!$OMP DO +do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,r) = - get_two_e_integral(u,q,t,r,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,s) = two_e_dm_mo(p,u,s,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3,& + mo_num*mo_num, tmp_bi_int_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 4 + +!-------- +! Part 4 +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +!-------- + +!$OMP DO +do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,r) = - get_two_e_integral(u,t,r,q,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,s) = two_e_dm_mo(p,u,t,s) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3,& + mo_num*mo_num, tmp_bi_int_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5-t4 +print*,'l3 2',t6 +!$OMP END MASTER + +!$OMP MASTER +CALL wall_TIME(t2) +t3 = t2 -t1 +print*,'Time to compute the hessian : ', t3 +!$OMP END MASTER + +! Deallocation of private arrays +! In the omp section ! + +deallocate(tmp_bi_int_3, tmp_2rdm_3, tmp_accu, tmp_accu_sym, ind_3) + +! Permutations +! As we mentioned before there are two permutation operator in the +! formula : +! Hessian(p,q,r,s) = P_pq P_rs [...] +! => Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5-t4 +print*,'Time for permutations :',t6 +!$OMP END MASTER + +! 4D -> 2D matrix +! We need a 2D matrix for the Newton method's. Since the Hessian is +! "antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +! We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +! with p 2D :',t6 +!$OMP END MASTER + +!$OMP END PARALLEL +call omp_set_max_active_levels(4) + +! Display +if (debug) then + print*,'2D Hessian matrix' + do pq = 1, n + write(*,'(100(F10.5))') H(pq,:) + enddo +endif + +! Deallocation of shared arrays, end + +deallocate(hessian)!,h_tmpr) +! h_tmpr is intent out in order to debug the subroutine +! It's why we don't deallocate it + + print*,'---End hessian---' + +end subroutine diff --git a/src/mo_optimization/my_providers.irp.f b/src/mo_optimization/my_providers.irp.f new file mode 100644 index 00000000..7469ffd5 --- /dev/null +++ b/src/mo_optimization/my_providers.irp.f @@ -0,0 +1,141 @@ +! Dimensions of MOs + + +BEGIN_PROVIDER [ integer, n_mo_dim ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of MOs we can build, + ! with i>j + END_DOC + + n_mo_dim = mo_num*(mo_num-1)/2 + +END_PROVIDER + +BEGIN_PROVIDER [ integer, n_mo_dim_core ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of core MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_core = dim_list_core_orb*(dim_list_core_orb-1)/2 + +END_PROVIDER + +BEGIN_PROVIDER [ integer, n_mo_dim_act ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of active MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_act = dim_list_act_orb*(dim_list_act_orb-1)/2 + +END_PROVIDER + +BEGIN_PROVIDER [ integer, n_mo_dim_inact ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of inactive MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_inact = dim_list_inact_orb*(dim_list_inact_orb-1)/2 + +END_PROVIDER + +BEGIN_PROVIDER [ integer, n_mo_dim_virt ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of virtual MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_virt = dim_list_virt_orb*(dim_list_virt_orb-1)/2 + +END_PROVIDER + +! Energies/criterions + +BEGIN_PROVIDER [ double precision, my_st_av_energy ] + implicit none + BEGIN_DOC + ! State average CI energy + END_DOC + + !call update_st_av_ci_energy(my_st_av_energy) + call state_average_energy(my_st_av_energy) + +END_PROVIDER + +! With all the MOs + +BEGIN_PROVIDER [ double precision, my_gradient_opt, (n_mo_dim) ] +&BEGIN_PROVIDER [ double precision, my_CC1_opt ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, for all the MOs. + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + PROVIDE mo_two_e_integrals_in_map + + call gradient_opt(n_mo_dim, my_gradient_opt, my_CC1_opt, norm_grad) + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, my_hessian_opt, (n_mo_dim, n_mo_dim) ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, for all the MOs. + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision, allocatable :: h_f(:,:,:,:) + + PROVIDE mo_two_e_integrals_in_map + + allocate(h_f(mo_num, mo_num, mo_num, mo_num)) + + call hessian_list_opt(n_mo_dim, my_hessian_opt, h_f) + +END_PROVIDER + +! With the list of active MOs +! Can be generalized to any mo_class by changing the list/dimension + +BEGIN_PROVIDER [ double precision, my_gradient_list_opt, (n_mo_dim_act) ] +&BEGIN_PROVIDER [ double precision, my_CC2_opt ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + PROVIDE mo_two_e_integrals_in_map !one_e_dm_mo two_e_dm_mo mo_one_e_integrals + + call gradient_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_gradient_list_opt, my_CC2_opt, norm_grad) + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, my_hessian_list_opt, (n_mo_dim_act, n_mo_dim_act) ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision, allocatable :: h_f(:,:,:,:) + + PROVIDE mo_two_e_integrals_in_map + + allocate(h_f(dim_list_act_orb, dim_list_act_orb, dim_list_act_orb, dim_list_act_orb)) + + call hessian_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_hessian_list_opt, h_f) + +END_PROVIDER diff --git a/src/mo_optimization/orb_opt.irp.f b/src/mo_optimization/orb_opt.irp.f new file mode 100644 index 00000000..71ff9262 --- /dev/null +++ b/src/mo_optimization/orb_opt.irp.f @@ -0,0 +1,22 @@ +! Orbital optimization program + +! This is an optimization program for molecular orbitals. It produces +! orbital rotations in order to lower the energy of a truncated wave +! function. +! This program just optimize the orbitals for a fixed number of +! determinants. This optimization process must be repeated for different +! number of determinants. + + + + +! Main program : orb_opt_trust + + +program orb_opt + read_wf = .true. ! must be True for the orbital optimization !!! + TOUCH read_wf + io_mo_two_e_integrals = 'None' + TOUCH io_mo_two_e_integrals + call run_orb_opt_trust_v2 +end diff --git a/src/mo_optimization/org/TANGLE_org_mode.sh b/src/mo_optimization/org/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/mo_optimization/org/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done diff --git a/src/mo_optimization/org/TODO.org b/src/mo_optimization/org/TODO.org new file mode 100644 index 00000000..960b9ba6 --- /dev/null +++ b/src/mo_optimization/org/TODO.org @@ -0,0 +1,17 @@ +TODO: +** TODO Keep under surveillance the performance of rotation matrix +- is the fix ok ? +** DONE Provider state_average_weight +** DONE Diagonal hessian for orbital optimization with a list of MOs +** DONE Something to force the step cancellation if R.R^T > treshold +** TODO Iterative method to compute the rotation matrix +- doesn't work actually +** DONE Test trust region with polynomial functions +** DONE Optimization/Localization program using the template +** DONE Correction OMP hessian shared/private arrays +** DONE State average energy +** DONE Correction of Rho +** TODO Check the PROVIDE/FREE/TOUCH +** TODO research of lambda without the power 2 +** DONE Clean the OMP sections + diff --git a/src/mo_optimization/org/debug_gradient_list_opt.org b/src/mo_optimization/org/debug_gradient_list_opt.org new file mode 100644 index 00000000..3c6f98c0 --- /dev/null +++ b/src/mo_optimization/org/debug_gradient_list_opt.org @@ -0,0 +1,79 @@ +* Debug the gradient + +*Program to check the gradient* + +The program compares the result of the first and last code for the +gradient. + +Provided: +| mo_num | integer | number of MOs | + +Internal: +| n | integer | number of orbitals pairs (p,q) p threshold) then + print*,i,v_grad(i) + nb_error = nb_error + 1 + + if (ABS(v_grad(i)) > max_error) then + max_error = v_grad(i) + endif + + endif + enddo + + print*,'' + print*,'Check the gradient' + print*,'Threshold:', threshold + print*,'Nb error:', nb_error + print*,'Max error:', max_error + + ! Deallocation + deallocate(v_grad,v_grad2) + +end program +#+END_SRC diff --git a/src/mo_optimization/org/debug_gradient_opt.org b/src/mo_optimization/org/debug_gradient_opt.org new file mode 100644 index 00000000..101e1e8c --- /dev/null +++ b/src/mo_optimization/org/debug_gradient_opt.org @@ -0,0 +1,77 @@ +* Debug the gradient + +*Program to check the gradient* + +The program compares the result of the first and last code for the +gradient. + +Provided: +| mo_num | integer | number of MOs | + +Internal: +| n | integer | number of orbitals pairs (p,q) p threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + + if (ABS(v_grad(i)) > max_error) then + max_error = v_grad(i) + endif + + endif + enddo + + print*,'' + print*,'Check the gradient' + print*,'Threshold :', threshold + print*,'Nb error :', nb_error + print*,'Max error :', max_error + + ! Deallocation + deallocate(v_grad,v_grad2) + +end program +#+END_SRC diff --git a/src/mo_optimization/org/debug_hessian_list_opt.org b/src/mo_optimization/org/debug_hessian_list_opt.org new file mode 100644 index 00000000..76e8b337 --- /dev/null +++ b/src/mo_optimization/org/debug_hessian_list_opt.org @@ -0,0 +1,148 @@ +* Debug the hessian + +*Program to check the hessian matrix* + +The program compares the result of the first and last code for the +hessian. First of all the 4D hessian and after the 2D hessian. + +Provided: +| mo_num | integer | number of MOs | +| optimization_method | string | Method for the orbital optimization: | +| | | - 'full' -> full hessian | +| | | - 'diag' -> diagonal hessian | +| dim_list_act_orb | integer | number of active MOs | +| list_act(dim_list_act_orb) | integer | list of the actives MOs | +| | | | + +Internal: +| m | integer | number of MOs in the list | +| | | (active MOs) | +| n | integer | number of orbitals pairs (p,q) p threshold) then + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + endif + enddo + enddo + enddo + enddo + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + ! Deallocation + deallocate(H, H2, h_f, h_f2) + + else + + print*, 'Use the diagonal hessian matrix' + allocate(H(n,1),H2(n,1)) + call diag_hessian_list_opt(n,m,list_act,H) + call first_diag_hessian_list_opt(n,m,list_act,H2) + + H = H - H2 + + max_error_H = 0d0 + nb_error_H = 0 + + do i = 1, n + if (ABS(H(i,1)) > threshold) then + print*, H(i,1) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,1)) > ABS(max_error_H)) then + max_error_H = H(i,1) + endif + + endif + enddo + + endif + + print*,'' + if (optimization_method == 'full') then + print*,'Check of the full hessian' + print*,'Threshold:', threshold + print*,'Nb error:', nb_error + print*,'Max error:', max_error + print*,'' + else + print*,'Check of the diagonal hessian' + endif + + print*,'Nb error_H:', nb_error_H + print*,'Max error_H:', max_error_H + +end program +#+END_SRC diff --git a/src/mo_optimization/org/debug_hessian_opt.org b/src/mo_optimization/org/debug_hessian_opt.org new file mode 100644 index 00000000..40f84c82 --- /dev/null +++ b/src/mo_optimization/org/debug_hessian_opt.org @@ -0,0 +1,172 @@ +* Debug the hessian + +*Program to check the hessian matrix* + +The program compares the result of the first and last code for the +hessian. First of all the 4D hessian and after the 2D hessian. + +Provided: +| mo_num | integer | number of MOs | + +Internal: +| n | integer | number of orbitals pairs (p,q) p threshold) then + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + endif + enddo + enddo + enddo + enddo + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + elseif (optimization_method == 'diag') then + + print*, 'Use the diagonal hessian matrix' + call diag_hessian_opt(n,H,h_f) + call first_diag_hessian_opt(n,H2,h_f2) + + h_f = h_f - h_f2 + max_error = 0d0 + nb_error = 0 + threshold = 1d-12 + + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + + if (ABS(h_f(i,j,k,l)) > threshold) then + + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + + endif + + enddo + enddo + enddo + enddo + + h=H-H2 + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + else + print*,'Unknown optimization_method, please select full, diag' + call abort + endif + + print*,'' + if (optimization_method == 'full') then + print*,'Check the full hessian' + else + print*,'Check the diagonal hessian' + endif + + print*,'Threshold :', threshold + print*,'Nb error :', nb_error + print*,'Max error :', max_error + print*,'' + print*,'Nb error_H :', nb_error_H + print*,'Max error_H :', max_error_H + + ! Deallocation + deallocate(H,H2,h_f,h_f2) + +end program +#+END_SRC diff --git a/src/mo_optimization/org/diagonal_hessian_list_opt.org b/src/mo_optimization/org/diagonal_hessian_list_opt.org new file mode 100644 index 00000000..a12ca981 --- /dev/null +++ b/src/mo_optimization/org/diagonal_hessian_list_opt.org @@ -0,0 +1,1561 @@ +* Diagonal hessian + +The hessian of the CI energy with respects to the orbital rotation is : +(C-c C-x C-l) + +\begin{align*} +H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] + -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) + + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) + - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} +With pq a permutation operator : + +\begin{align*} +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +\end{align*} +\begin{align*} +\mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +&= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +\end{align*} + +Where p,q,r,s,t,u,v are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +Here for the diagonal of the hessian it's a little more complicated +than for the hessian. It's not just compute the diagonal terms of the +hessian because of the permutations. + +The hessian is (p,q,r,s), so the diagonal terms are (p,q,p,q). But +with the permutations : p <-> q, r <-> s, p <-> q and r <-> s, we have +a diagonal term, if : +p = r and q = s, => (p,q,p,q) +or +q = r and p = s, => (p,q,q,p) + +For that reason, we will use 2D temporary arrays to store the +elements. One for the terms (p,q,p,q) and an other for the terms of +kind (p,q,q,p). We will also use a 1D temporary array to store the +terms of the kind (p,p,p,p) due to the kronoecker delta. + +*Compute the diagonal hessian of energy with respects to orbital +rotations* +By diagonal hessian we mean, diagonal elements of the hessian + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +| two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| H(n,n) | double precision | Hessian matrix | +| h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +| | | in n by n matrix | + +Internal: +| hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +| | | the permutations | +| p, q, r, s | integer | indexes of the hessian elements | +| t, u, v | integer | indexes for the sums | +| pq, rs | integer | indexes for the transformation of the hessian | +| | | (4D -> 2D) | +| t1,t2,t3 | double precision | time to compute the hessian | +| t4,t5,t6 | double precision | time to compute the differ each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (private) | +| tmp_bi_int_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (shared) | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (private) | +| tmp_2rdm_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (shared) | +| tmp_accu(mo_num,mo_num) | double precision | temporary array (private) | +| tmp_accu_shared(mo_num,mo_num) | double precision | temporary array (shared) | +| tmp_accu_1(mo_num) | double precision | temporary array (private) | +| tmp_accu_1_shared(mo_num) | double precision | temporary array (shared) | +| tmp_h_pppp(mo_num) | double precision | matrix containing the hessien elements hessian(p,p,p,p) | +| tmp_h_pqpq(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,p,q) | +| tmp_h_pqqp(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,q,p) | + +Function: +| get_two_e_integral | double precision | bi-electronic integrals | + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f +subroutine diag_hessian_list_opt(n, m, list, H)!, h_tmpr) + + use omp_lib + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n, m, list(m) + + ! out + double precision, intent(out) :: H(n)!, h_tmpr(m,m,m,m) + + ! internal + !double precision, allocatable :: !hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + integer :: p,q,k + integer :: r,s,t,u,v + integer :: pq,rs + integer :: tmp_p,tmp_q,tmp_r,tmp_s,tmp_pq,tmp_rs + double precision :: t1,t2,t3,t4,t5,t6 + double precision, allocatable :: tmp_bi_int_3(:,:,:),tmp_bi_int_3_shared(:,:,:) + double precision, allocatable :: tmp_2rdm_3(:,:,:),tmp_2rdm_3_shared(:,:,:) + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_accu_shared(:,:), tmp_accu_1_shared(:) + double precision, allocatable :: tmp_h_pppp(:), tmp_h_pqpq(:,:), tmp_h_pqqp(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'--- Diagonal_hessian_list_opt---' + + ! Allocation of shared arrays + !allocate(hessian(m,m,m,m))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_h_pppp(m),tmp_h_pqpq(m,m),tmp_h_pqqp(m,m)) + allocate(tmp_2rdm_3_shared(mo_num,mo_num,m)) + allocate(tmp_bi_int_3_shared(mo_num,mo_num,m)) + allocate(tmp_accu_1_shared(m),tmp_accu_shared(m,m)) + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu,k, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, & + !$OMP tmp_p,tmp_q,tmp_r,tmp_s) & + !$OMP SHARED(H, tmp_h_pppp, tmp_h_pqpq, tmp_h_pqqp, & + !$OMP mo_num,n,m, mo_one_e_integrals, one_e_dm_mo, list, & + !$OMP tmp_bi_int_3_shared, tmp_2rdm_3_shared,tmp_accu_shared, & + !$OMP tmp_accu_1_shared,two_e_dm_mo,mo_integrals_map,t1,t2,t3,t4,t5,t6) & + !$OMP DEFAULT(NONE) + + ! Allocation of the private arrays + allocate(tmp_accu(m,m)) +#+END_SRC + +** Initialization of the arrays +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !!$OMP DO + !do tmp_s = 1,m + ! do tmp_r = 1, m + ! do tmp_q = 1, m + ! do tmp_p = 1, m + ! hessian(tmp_p,tmp_q,tmp_r,tmp_s) = 0d0 + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + tmp_h_pqpq(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + tmp_h_pqqp(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t1) + !$OMP END MASTER +#+END_SRC + +** Line 1, term 1 + +\begin{align*} +\frac{1}{2} \sum_u \delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (q==r) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and q=r* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & ++ mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and q=r* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & ++ mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 2 + +\begin{align*} +\frac{1}{2} \sum_u \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (p==s) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + enddo + endif + endif + enddo + enddo + enddo +enddo + +*Part 1 : p=r and q=s and p=s* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * (& + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * (& + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(q,u)) + = + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_q) = tmp_accu_1_shared(tmp_q) + & + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_q) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 3 + +\begin{align*} +-(h_p^s \gamma_r^q + h_r^q \gamma_p^s) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_rdm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_rdm_mo(p,s) + + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s* + +hessian(p,q,r,s) -> hessian(p,q,p,q) + + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) += + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) += + - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) & + - 2d0 * mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + +hessian(p,q,r,s) -> hessian(p,q,p,q) + + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) += + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) += + - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) & + - 2d0 * mo_one_e_integrals(p,p) * one_e_dm_mo(q,q) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 3',t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 1 + +\begin{align*} +\frac{1}{2} \sum_{tuv} \delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and q=r* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & ++ get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & ++ get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + allocate(tmp_bi_int_3(mo_num, mo_num, m),tmp_2rdm_3(mo_num, mo_num, m)) + + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) & + + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and q=r* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & ++ get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & ++ get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'l2 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 2 + +\begin{align*} +\frac{1}{2} \sum_{tuv} \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and p=s* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & ++ get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + 0.5d0 * ( & + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v) & ++ get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) +& + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & ++ get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(q,t,u,v) & ++ get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_q) = two_e_dm_mo(u,v,q,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_q = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_q) = tmp_accu_1_shared(tmp_q) +& + tmp_bi_int_3(u,v,tmp_q) * tmp_2rdm_3(u,v,tmp_q) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'l2 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 1 + +\begin{align*} +\sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)))) then + + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +With optimization + +*Part 1 : p=r and q=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & ++ get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + = + get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) & ++ get_two_e_integral(q,q,u,v,mo_integrals_map) * two_e_dm_mo(p,p,u,v) + = + 2d0 * get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) + +Arrays of the kind (u,v,p,p) can be transform in 4D arrays (u,v,p). +Using u,v as one variable a matrix multiplication appears. +$$c_{p,q} = \sum_{uv} a_{p,uv} b_{uv,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,v,tmp_q) = two_e_dm_mo(u,v,q,q) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_shared(u,v,tmp_p) = get_two_e_integral(u,v,p,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu(tmp_p,tmp_q) + tmp_accu(tmp_q,tmp_p) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & ++ get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + = + get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) & ++ get_two_e_integral(q,p,u,v,mo_integrals_map) * two_e_dm_mo(p,q,u,v) + = + 2d0 * get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + call wall_time(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = 2d0 * get_two_e_integral(u,v,q,p,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,q) + + enddo + enddo + enddo + + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) & + + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3,tmp_2rdm_3) + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l3 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 2 + +\begin{align*} +- \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do t = 1, mo_num + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + endif + + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + = + - get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & + - get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) & + - get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(p,u,q,t) & + - get_two_e_integral(q,u,t,p,mo_integrals_map) * two_e_dm_mo(p,u,t,q) + = + - 2d0 * get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & + - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + = + - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) & + - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !---------- + ! Part 1.1 + !---------- + ! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) + + allocate(tmp_bi_int_3(m, mo_num, m), tmp_2rdm_3(m, mo_num, m)) + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_shared(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do tmp_q = 1, m + q = list(tmp_q) + + tmp_bi_int_3(tmp_q,u,tmp_p) = 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do tmp_q = 1, m + q = list(tmp_q) + + tmp_2rdm_3(tmp_q,u,tmp_p) = two_e_dm_mo(q,u,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do u = 1, mo_num + do tmp_q = 1, m + + tmp_accu_shared(tmp_p,tmp_q) = tmp_accu_shared(tmp_p,tmp_q) & + - tmp_bi_int_3(tmp_q,u,tmp_p) * tmp_2rdm_3(tmp_q,u,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu_shared(tmp_p,tmp_q) + + enddo + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3, tmp_2rdm_3) +#+END_SRC + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !-------- + ! Part 1.2 + !-------- + ! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + + allocate(tmp_bi_int_3(mo_num, m, m),tmp_2rdm_3(mo_num, m, m)) + + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_shared(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do u = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + + tmp_bi_int_3(t,tmp_q,tmp_p) = 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p= 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + + tmp_2rdm_3(t,tmp_q,tmp_p) = two_e_dm_mo(t,p,q,u) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_q = 1, m + do tmp_p = 1, m + do t = 1, mo_num + + tmp_accu_shared(tmp_p,tmp_q) = tmp_accu_shared(tmp_p,tmp_q) & + - tmp_bi_int_3(t,tmp_q,tmp_p) * tmp_2rdm_3(t,tmp_q,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu_shared(tmp_p,tmp_q) + + enddo + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3,tmp_2rdm_3) +#+END_SRC + +*Part 2 : q=r and p=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + = + - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & + - get_two_e_integral(t,p,p,u,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & + - get_two_e_integral(q,u,q,t,mo_integrals_map) * two_e_dm_mo(p,u,p,t) & + - get_two_e_integral(q,u,t,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + = + - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & + - get_two_e_integral(q,t,q,u,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + + - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & + - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(t,p,p,u) + = + - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & + - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + + - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & + - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +Arrays of the kind (t,p,u,p) can be transformed in 3D arrays. By doing +so and using t,u as one variable, a matrix multiplication appears : +$$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !---------- + ! Part 2.1 + !---------- + ! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & + ! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3_shared(t,u,tmp_q) = two_e_dm_mo(t,q,u,q) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,tmp_p) = get_two_e_integral(t,p,u,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP DO + do tmp_p = 1, m + do tmp_q = 1, m + + tmp_h_pqqp(tmp_q,tmp_p) = tmp_h_pqqp(tmp_q,tmp_p) - tmp_accu(tmp_q,tmp_p) - tmp_accu(tmp_p,tmp_q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +Arrays of the kind (t,u,p,p) can be transformed in 3D arrays. By doing +so and using t,u as one variable, a matrix multiplication appears : +$$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !-------- + ! Part 2.2 + !-------- + ! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & + ! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,tmp_p) = get_two_e_integral(t,u,p,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,t,tmp_q) = two_e_dm_mo(q,u,t,q) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_shared,& + mo_num*mo_num, tmp_bi_int_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) - tmp_accu(tmp_p,tmp_q) - tmp_accu(tmp_q,tmp_p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l3 2',t6 + !$OMP END MASTER + + !$OMP MASTER + CALL wall_TIME(t2) + t2 = t2 - t1 + print*, 'Time to compute the hessian :', t2 + !$OMP END MASTER +#+END_SRC + +** Deallocation of private arrays +In the OMP section ! +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + deallocate(tmp_accu) +#+END_SRC + +** Permutations +As we mentioned before there are two permutation operator in the +formula : +Hessian(p,q,r,s) = P_pq P_rs [...] +=> Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !!$OMP DO + !do tmp_p = 1, m + ! hessian(tmp_p,tmp_p,tmp_p,tmp_p) = hessian(tmp_p,tmp_p,tmp_p,tmp_p) + tmp_h_pppp(tmp_p) + !enddo + !!$OMP END DO + + !!$OMP DO + !do tmp_q = 1, m + ! do tmp_p = 1, m + ! hessian(tmp_p,tmp_q,tmp_p,tmp_q) = hessian(tmp_p,tmp_q,tmp_p,tmp_q) + tmp_h_pqpq(tmp_p,tmp_q) + ! enddo + !enddo + !!$OMP END DO + ! + !!$OMP DO + !do tmp_q = 1, m + ! do tmp_p = 1, m + ! hessian(tmp_p,tmp_q,tmp_q,tmp_p) = hessian(tmp_p,tmp_q,tmp_q,tmp_p) + tmp_h_pqqp(tmp_p,tmp_q) + ! enddo + !enddo + !!$OMP END DO + + !!$OMP DO + !do tmp_s = 1, m + ! do tmp_r = 1, m + ! do tmp_q = 1, m + ! do tmp_p = 1, m + + ! h_tmpr(tmp_p,tmp_q,tmp_r,tmp_s) = (hessian(tmp_p,tmp_q,tmp_r,tmp_s) - hessian(tmp_q,tmp_p,tmp_r,tmp_s) & + ! - hessian(tmp_p,tmp_q,tmp_s,tmp_r) + hessian(tmp_q,tmp_p,tmp_s,tmp_r)) + + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO +#+END_SRC + +** 4D -> 2D matrix +We need a 2D matrix for the Newton method's. Since the Hessian is +"antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +with p 2D :',t6 + !!$OMP END MASTER + + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + ! Display + !if (debug) then + ! print*,'2D diag Hessian matrix' + ! do tmp_pq = 1, n + ! write(*,'(100(F10.5))') H(tmp_pq,:) + ! enddo + !endif +#+END_SRC + +** Deallocation of shared arrays, end + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !deallocate(hessian)!,h_tmpr) + deallocate(tmp_h_pppp,tmp_h_pqpq,tmp_h_pqqp) + deallocate(tmp_accu_1_shared, tmp_accu_shared) + + print*,'---End diagonal_hessian_list_opt---' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/diagonal_hessian_opt.org b/src/mo_optimization/org/diagonal_hessian_opt.org new file mode 100644 index 00000000..efd75065 --- /dev/null +++ b/src/mo_optimization/org/diagonal_hessian_opt.org @@ -0,0 +1,1516 @@ +* Diagonal hessian + +The hessian of the CI energy with respects to the orbital rotation is : +(C-c C-x C-l) + +\begin{align*} +H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] + -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) + + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) + - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} +With pq a permutation operator : + +\begin{align*} +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +\end{align*} +\begin{align*} +\mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +&= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +\end{align*} + +Where p,q,r,s,t,u,v are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +Here for the diagonal of the hessian it's a little more complicated +than for the hessian. It's not just compute the diagonal terms of the +hessian because of the permutations. + +The hessian is (p,q,r,s), so the diagonal terms are (p,q,p,q). But +with the permutations : p <-> q, r <-> s, p <-> q and r <-> s, we have +a diagonal term, if : +p = r and q = s, => (p,q,p,q) +or +q = r and p = s, => (p,q,q,p) + +For that reason, we will use 2D temporary arrays to store the +elements. One for the terms (p,q,p,q) and an other for the terms of +kind (p,q,q,p). We will also use a 1D temporary array to store the +terms of the kind (p,p,p,p) due to the kronoecker delta. + +*Compute the diagonal hessian of energy with respects to orbital +rotations* +By diagonal hessian we mean, diagonal elements of the hessian + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +| two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| H(n,n) | double precision | Hessian matrix | +| h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +| | | in n by n matrix | + +Internal: +| hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +| | | the permutations | +| p, q, r, s | integer | indexes of the hessian elements | +| t, u, v | integer | indexes for the sums | +| pq, rs | integer | indexes for the transformation of the hessian | +| | | (4D -> 2D) | +| t1,t2,t3 | double precision | time to compute the hessian | +| t4,t5,t6 | double precision | time to compute the differ each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (private) | +| tmp_bi_int_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (shared) | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (private) | +| tmp_2rdm_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (shared) | +| tmp_accu(mo_num,mo_num) | double precision | temporary array (private) | +| tmp_accu_shared(mo_num,mo_num) | double precision | temporary array (shared) | +| tmp_accu_1(mo_num) | double precision | temporary array (private) | +| tmp_accu_1_shared(mo_num) | double precision | temporary array (shared) | +| tmp_h_pppp(mo_num) | double precision | matrix containing the hessien elements hessian(p,p,p,p) | +| tmp_h_pqpq(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,p,q) | +| tmp_h_pqqp(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,q,p) | + +Function: +| get_two_e_integral | double precision | bi-electronic integrals | + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f +subroutine diag_hessian_opt(n,H)!, h_tmpr) + + use omp_lib + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: H(n)!,n), h_tmpr(mo_num,mo_num,mo_num,mo_num) + + ! internal + !double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + integer :: p,q,k + integer :: r,s,t,u,v + integer :: pq,rs + integer :: istate + double precision :: t1,t2,t3,t4,t5,t6 + double precision, allocatable :: tmp_bi_int_3(:,:,:),tmp_bi_int_3_shared(:,:,:) + double precision, allocatable :: tmp_2rdm_3(:,:,:),tmp_2rdm_3_shared(:,:,:) + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_accu_shared(:,:), tmp_accu_1_shared(:) + double precision, allocatable :: tmp_h_pppp(:), tmp_h_pqpq(:,:), tmp_h_pqqp(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'--- diagonal hessian---' + print*,'Use the diagonal hessian' + + ! Allocation of shared arrays + !allocate(hessian(mo_num,mo_num,mo_num,mo_num))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_h_pppp(mo_num),tmp_h_pqpq(mo_num,mo_num),tmp_h_pqqp(mo_num,mo_num)) + allocate(tmp_2rdm_3_shared(mo_num,mo_num,mo_num)) + allocate(tmp_bi_int_3_shared(mo_num,mo_num,mo_num)) + allocate(tmp_accu_1_shared(mo_num),tmp_accu_shared(mo_num,mo_num)) + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu,k, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3) & + !$OMP SHARED(H, tmp_h_pppp, tmp_h_pqpq, tmp_h_pqqp, & + !$OMP mo_num,n, mo_one_e_integrals, one_e_dm_mo, & + !$OMP tmp_bi_int_3_shared, tmp_2rdm_3_shared,tmp_accu_shared, & + !$OMP tmp_accu_1_shared,two_e_dm_mo,mo_integrals_map,t1,t2,t3,t4,t5,t6) & + !$OMP DEFAULT(NONE) + + ! Allocation of the private arrays + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num),tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(mo_num,mo_num)) +#+END_SRC + +** Initialization of the arrays +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !!$OMP DO + !do s = 1,mo_num + ! do r = 1, mo_num + ! do q = 1, mo_num + ! do p = 1, mo_num + ! hessian(p,q,r,s) = 0d0 + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO + + !$OMP DO + do p = 1, mo_num + tmp_h_pppp(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + tmp_h_pqpq(p,q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + tmp_h_pqqp(p,q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t1) + !$OMP END MASTER +#+END_SRC + +** Line 1, term 1 + +\begin{align*} +\frac{1}{2} \sum_u \delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (q==r) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and q=r* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & ++ mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and q=r* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & ++ mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 2 + +\begin{align*} +\frac{1}{2} \sum_u \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (p==s) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + enddo + endif + endif + enddo + enddo + enddo +enddo + +*Part 1 : p=r and q=s and p=s* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * (& + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * (& + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(q,u)) + = + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(q) = tmp_accu_1_shared(q) + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(q) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 3 + +\begin{align*} +-(h_p^s \gamma_r^q + h_r^q \gamma_p^s) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_rdm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_rdm_mo(p,s) + + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s* + +hessian(p,q,r,s) -> hessian(p,q,p,q) + + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) += + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) += + - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) & + - 2d0 * mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + +hessian(p,q,r,s) -> hessian(p,q,p,q) + + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) += + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) += + - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) & + - 2d0 * mo_one_e_integrals(p,p) * one_e_dm_mo(q,q) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 3',t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 1 + +\begin{align*} +\frac{1}{2} \sum_{tuv} \delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and q=r* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & ++ get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & ++ get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) & + + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do p =1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and q=r* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & ++ get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & ++ get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + & + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'l2 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 2 + +\begin{align*} +\frac{1}{2} \sum_{tuv} \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and p=s* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & ++ get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + 0.5d0 * ( & + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v) & ++ get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) +& + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & ++ get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(q,t,u,v) & ++ get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do p = 1,mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,q) = two_e_dm_mo(u,v,q,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(q) = tmp_accu_1_shared(q) +& + tmp_bi_int_3(u,v,q) * tmp_2rdm_3(u,v,q) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'l2 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 1 + +\begin{align*} +\sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)))) then + + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +With optimization + +*Part 1 : p=r and q=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & ++ get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + = + get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) & ++ get_two_e_integral(q,q,u,v,mo_integrals_map) * two_e_dm_mo(p,p,u,v) + = + 2d0 * get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) + +Arrays of the kind (u,v,p,p) can be transform in 4D arrays (u,v,p). +Using u,v as one variable a matrix multiplication appears. +$$c_{p,q} = \sum_{uv} a_{p,uv} b_{uv,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,v,q) = two_e_dm_mo(u,v,q,q) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_shared(u,v,p) = get_two_e_integral(u,v,p,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu(p,q) + tmp_accu(q,p) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & ++ get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + = + get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) & ++ get_two_e_integral(q,p,u,v,mo_integrals_map) * two_e_dm_mo(p,q,u,v) + = + 2d0 * get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + call wall_time(t4) + !$OMP END MASTER + + !$OMP DO + do q = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = 2d0 * get_two_e_integral(u,v,q,p,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,q) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) & + + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l3 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 2 + +\begin{align*} +- \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do t = 1, mo_num + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + endif + + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + = + - get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & + - get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) & + - get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(p,u,q,t) & + - get_two_e_integral(q,u,t,p,mo_integrals_map) * two_e_dm_mo(p,u,t,q) + = + - 2d0 * get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & + - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + = + - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) & + - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !---------- + ! Part 1.1 + !---------- + ! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_bi_int_3(q,u,p) = 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_2rdm_3(q,u,p) = two_e_dm_mo(q,u,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_accu_shared(p,q) = tmp_accu_shared(p,q) & + - tmp_bi_int_3(q,u,p) * tmp_2rdm_3(q,u,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu_shared(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !-------- + ! Part 1.2 + !-------- + ! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do u = 1, mo_num + + do p = 1, mo_num + do q = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3(t,q,p) = 2d0*get_two_e_integral(t,q,p,u,mo_integrals_map) + + enddo + enddo + enddo + + do p= 1, mo_num + do q = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3(t,q,p) = two_e_dm_mo(t,p,q,u) + + enddo + enddo + enddo + + !$OMP CRITICAL + do q = 1, mo_num + do p = 1, mo_num + do t = 1, mo_num + + tmp_accu_shared(p,q) = tmp_accu_shared(p,q) & + - tmp_bi_int_3(t,q,p) * tmp_2rdm_3(t,q,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu_shared(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + = + - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & + - get_two_e_integral(t,p,p,u,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & + - get_two_e_integral(q,u,q,t,mo_integrals_map) * two_e_dm_mo(p,u,p,t) & + - get_two_e_integral(q,u,t,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + = + - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & + - get_two_e_integral(q,t,q,u,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + + - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & + - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(t,p,p,u) + = + - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & + - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + + - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & + - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +Arrays of the kind (t,p,u,p) can be transformed in 3D arrays. By doing +so and using t,u as one variable, a matrix multiplication appears : +$$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !---------- + ! Part 2.1 + !---------- + ! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & + ! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) + + !$OMP DO + do q = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3_shared(t,u,q) = two_e_dm_mo(t,q,u,q) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,p) = get_two_e_integral(t,p,u,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP DO + do p = 1, mo_num + do q = 1, mo_num + + tmp_h_pqqp(q,p) = tmp_h_pqqp(q,p) - tmp_accu(q,p) - tmp_accu(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +Arrays of the kind (t,u,p,p) can be transformed in 3D arrays. By doing +so and using t,u as one variable, a matrix multiplication appears : +$$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !-------- + ! Part 2.2 + !-------- + ! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & + ! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,p) = get_two_e_integral(t,u,p,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,t,q) = two_e_dm_mo(q,u,t,q) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3_shared,& + mo_num*mo_num, tmp_bi_int_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) - tmp_accu(p,q) - tmp_accu(q,p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l3 2',t6 + !$OMP END MASTER + + !$OMP MASTER + CALL wall_TIME(t2) + t2 = t2 - t1 + print*, 'Time to compute the hessian :', t2 + !$OMP END MASTER +#+END_SRC + +** Deallocation of private arrays +In the OMP section ! +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + deallocate(tmp_2rdm_3,tmp_bi_int_3) + deallocate(tmp_accu) +#+END_SRC + +** Permutations +As we mentioned before there are two permutation operator in the +formula : +Hessian(p,q,r,s) = P_pq P_rs [...] +=> Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !!$OMP DO + !do p = 1, mo_num + ! hessian(p,p,p,p) = hessian(p,p,p,p) + tmp_h_pppp(p) + !enddo + !!$OMP END DO + + !!$OMP DO + !do q = 1, mo_num + ! do p = 1, mo_num + ! hessian(p,q,p,q) = hessian(p,q,p,q) + tmp_h_pqpq(p,q) + ! enddo + !enddo + !!$OMP END DO + ! + !!$OMP DO + !do q = 1, mo_num + ! do p = 1, mo_num + ! hessian(p,q,q,p) = hessian(p,q,q,p) + tmp_h_pqqp(p,q) + ! enddo + !enddo + !!$OMP END DO + + !!$OMP DO + !do s = 1, mo_num + ! do r = 1, mo_num + ! do q = 1, mo_num + ! do p = 1, mo_num + + ! h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO +#+END_SRC + +** 4D -> 2D matrix +We need a 2D matrix for the Newton method's. Since the Hessian is +"antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +with p 2D :',t6 + !!$OMP END MASTER + + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + ! Display + !if (debug) then + ! print*,'2D diag Hessian matrix' + ! do pq = 1, n + ! write(*,'(100(F10.5))') H(pq,:) + ! enddo + !endif +#+END_SRC + +** Deallocation of shared arrays, end + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !deallocate(hessian)!,h_tmpr) + deallocate(tmp_h_pppp,tmp_h_pqpq,tmp_h_pqqp) + deallocate(tmp_accu_1_shared, tmp_accu_shared) + + print*,'---diagonal_hessian' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/diagonalization_hessian.org b/src/mo_optimization/org/diagonalization_hessian.org new file mode 100644 index 00000000..5eed7dd5 --- /dev/null +++ b/src/mo_optimization/org/diagonalization_hessian.org @@ -0,0 +1,138 @@ +* Diagonalization of the hessian + +Just a matrix diagonalization using Lapack + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| H(n,n) | double precision | hessian | + +Output: +| e_val(n) | double precision | eigenvalues of the hessian | +| w(n,n) | double precision | eigenvectors of the hessian | + +Internal: +| nb_negative_nv | integer | number of negative eigenvalues | +| lwork | integer | for Lapack | +| work(lwork,n) | double precision | temporary array for Lapack | +| info | integer | if 0 -> ok, else problem in the diagonalization | +| i,j | integer | dummy indexes | + +#+BEGIN_SRC f90 :comments org :tangle diagonalization_hessian.irp.f +subroutine diagonalization_hessian(n,H,e_val,w) + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: H(n,n) + + ! out + double precision, intent(out) :: e_val(n), w(n,n) + + ! internal + double precision, allocatable :: work(:,:) + integer, allocatable :: key(:) + integer :: info,lwork + integer :: i,j + integer :: nb_negative_vp + double precision :: t1,t2,t3,max_elem + + print*,'' + print*,'---Diagonalization_hessian---' + + call wall_time(t1) + + if (optimization_method == 'full') then + ! Allocation + ! For Lapack + lwork=3*n-1 + + allocate(work(lwork,n)) + + ! Calculation + + ! Copy the hessian matrix, the eigenvectors will be store in W + W=H + + ! Diagonalization of the hessian + call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info) + + if (info /= 0) then + print*, 'Error diagonalization : diagonalization_hessian' + print*, 'info = ', info + call ABORT + endif + + if (debug) then + print *, 'vp Hess:' + write(*,'(100(F10.5))') real(e_val(:)) + endif + + ! Number of negative eigenvalues + max_elem = 0d0 + nb_negative_vp = 0 + do i = 1, n + if (e_val(i) < 0d0) then + nb_negative_vp = nb_negative_vp + 1 + if (e_val(i) < max_elem) then + max_elem = e_val(i) + endif + !print*,'e_val < 0 :', e_val(i) + endif + enddo + print*,'Number of negative eigenvalues:', nb_negative_vp + print*,'Lowest eigenvalue:',max_elem + + !nb_negative_vp = 0 + !do i = 1, n + ! if (e_val(i) < -thresh_eig) then + ! nb_negative_vp = nb_negative_vp + 1 + ! endif + !enddo + !print*,'Number of negative eigenvalues <', -thresh_eig,':', nb_negative_vp + + ! Deallocation + deallocate(work) + + elseif (optimization_method == 'diag') then + ! Diagonalization of the diagonal hessian by hands + allocate(key(n)) + + do i = 1, n + e_val(i) = H(i,i) + enddo + + ! Key list for dsort + do i = 1, n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, n) + + ! Eigenvectors + W = 0d0 + do i = 1, n + j = key(i) + W(j,i) = 1d0 + enddo + + deallocate(key) + else + print*,'Diagonalization_hessian, abort' + call abort + endif + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in diagonalization_hessian:', t3 + + print*,'---End diagonalization_hessian---' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/first_diagonal_hessian_list_opt.org b/src/mo_optimization/org/first_diagonal_hessian_list_opt.org new file mode 100644 index 00000000..391c6033 --- /dev/null +++ b/src/mo_optimization/org/first_diagonal_hessian_list_opt.org @@ -0,0 +1,376 @@ +* First diagonal hessian + +#+BEGIN_SRC f90 :comments :tangle first_diagonal_hessian_list_opt.irp.f +subroutine first_diag_hessian_list_opt(tmp_n,m,list,H)!, h_tmpr) + + include 'constants.h' + + implicit none + + !=========================================================================== + ! Compute the diagonal hessian of energy with respects to orbital rotations + !=========================================================================== + + !=========== + ! Variables + !=========== + + ! in + integer, intent(in) :: tmp_n, m, list(m) + ! tmp_n : integer, tmp_n = m*(m-1)/2 + + ! out + double precision, intent(out) :: H(tmp_n)!, h_tmpr(m,m,m,m) + ! H : n by n double precision matrix containing the 2D hessian + + ! internal + double precision, allocatable :: hessian(:,:,:,:), tmp(:,:),h_tmpr(:,:,:,:) + integer :: p,q, tmp_p,tmp_q + integer :: r,s,t,u,v,tmp_r,tmp_s,tmp_t,tmp_u,tmp_v + integer :: pq,rs,tmp_pq,tmp_rs + double precision :: t1,t2,t3 + ! hessian : mo_num 4D double precision matrix containing the hessian before the permutations + ! h_tmpr : mo_num 4D double precision matrix containing the hessian after the permutations + ! p,q,r,s : integer, indexes of the 4D hessian matrix + ! t,u,v : integer, indexes to compute hessian elements + ! pq,rs : integer, indexes for the conversion from 4D to 2D hessian matrix + ! t1,t2,t3 : double precision, t3 = t2 - t1, time to compute the hessian + + ! Function + double precision :: get_two_e_integral + ! get_two_e_integral : double precision function, two e integrals + + ! Provided : + ! mo_one_e_integrals : mono e- integrals + ! get_two_e_integral : two e- integrals + ! one_e_dm_mo_alpha, one_e_dm_mo_beta : one body density matrix + ! two_e_dm_mo : two body density matrix + + print*,'---first_diag_hess_list---' + + !============ + ! Allocation + !============ + + allocate(hessian(m,m,m,m),tmp(tmp_n,tmp_n),h_tmpr(mo_num,mo_num,mo_num,mo_num)) + + !============= + ! Calculation + !============= + + ! From Anderson et. al. (2014) + ! The Journal of Chemical Physics 141, 244104 (2014); doi: 10.1063/1.4904384 + + ! LaTeX formula : + + !\begin{align*} + !H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + !&= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + !+ \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_u^r)] + !-(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + !&+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} +v_{uv}^{st} \Gamma_{pt}^{uv}) + !+ \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + !&+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{ps}^{uv}) \\ + !&- \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) + !\end{align*} + + !================ + ! Initialization + !================ + hessian = 0d0 + + CALL wall_time(t1) + + !======================== + ! First line, first term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (q==r) then + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !========================= + ! First line, second term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (p==s) then + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !======================== + ! First line, third term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + endif + + enddo + enddo + enddo + enddo + + !========================= + ! Second line, first term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !========================== + ! Second line, second term + !========================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !======================== + ! Third line, first term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + endif + + enddo + enddo + enddo + enddo + + !========================= + ! Third line, second term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do t = 1, mo_num + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + endif + + enddo + enddo + enddo + enddo + + CALL wall_time(t2) + t2 = t2 - t1 + print*, 'Time to compute the hessian :', t2 + + !============== + ! Permutations + !============== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + + h_tmpr(tmp_p,tmp_q,tmp_r,tmp_s) = (hessian(tmp_p,tmp_q,tmp_r,tmp_s) - hessian(tmp_q,tmp_p,tmp_r,tmp_s) & + - hessian(tmp_p,tmp_q,tmp_s,tmp_r) + hessian(tmp_q,tmp_p,tmp_s,tmp_r)) + + enddo + enddo + enddo + enddo + + !======================== + ! 4D matrix to 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo + enddo + + !======================== + ! 4D matrix to 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p vector, transformation +In addition there is a permutation in the gradient formula : +\begin{equation} +P_{pq} = 1 - (p <-> q) +\end{equation} + +We need a vector to use the gradient. Here the gradient is a +antisymetric matrix so we can transform it in a vector of length +mo_num*(mo_num-1)/2. + +Here we do these two things at the same time. + +#+BEGIN_SRC f90 :comments org :tangle gradient_list_opt.irp.f + do i=1,n + call vec_to_mat_index(i,p,q) + v_grad(i)=(grad(p,q) - grad(q,p)) + enddo + + ! Debug, diplay the vector containing the gradient elements + if (debug) then + print*,'Vector containing the gradient :' + write(*,'(100(F10.5))') v_grad(1:n) + endif +#+END_SRC + +*** Norm of the gradient +The norm can be useful. +#+BEGIN_SRC f90 :comments org :tangle gradient_list_opt.irp.f + norm = dnrm2(n,v_grad,1) + print*, 'Gradient norm : ', norm +#+END_SRC + +*** Maximum element in the gradient +The maximum element in the gradient is very important for the +convergence criterion of the Newton method. + +#+BEGIN_SRC f90 :comments org :tangle gradient_list_opt.irp.f + ! Max element of the gradient + max_elem = 0d0 + do i = 1, n + if (DABS(v_grad(i)) > DABS(max_elem)) then + max_elem = v_grad(i) + endif + enddo + + print*,'Max element in the gradient :', max_elem + + ! Debug, display the matrix containting the gradient elements + if (debug) then + ! Matrix gradient + A = 0d0 + do q=1,m + do p=1,m + A(p,q) = grad(p,q) - grad(q,p) + enddo + enddo + print*,'Matrix containing the gradient :' + do i = 1, m + write(*,'(100(F10.5))') A(i,1:m) + enddo + endif +#+END_SRC + +*** Deallocation of shared arrays and end +#+BEGIN_SRC f90 :comments org :tangle gradient_list_opt.irp.f + deallocate(grad,A, tmp_mo_one_e_integrals,tmp_one_e_dm_mo) + + print*,'---End gradient---' + + end subroutine + +#+END_SRC + diff --git a/src/mo_optimization/org/gradient_opt.org b/src/mo_optimization/org/gradient_opt.org new file mode 100644 index 00000000..45c761e9 --- /dev/null +++ b/src/mo_optimization/org/gradient_opt.org @@ -0,0 +1,358 @@ +* Gradient + +The gradient of the CI energy with respects to the orbital rotation +is: +(C-c C-x C-l) +$$ +G(p,q) = \mathcal{P}_{pq} \left[ \sum_r (h_p^r \gamma_r^q - h_r^q \gamma_p^r) + +\sum_{rst}(v_{pt}^{rs} \Gamma_{rs}^{qt} - v_{rs}^{qt} \Gamma_{pt}^{rs}) +\right] +$$ + + +$$ +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +$$ + +$$ +G(p,q) = \left[ +\sum_r (h_p^r \gamma_r^q - h_r^q \gamma_p^r) + +\sum_{rst}(v_{pt}^{rs} \Gamma_{rs}^{qt} - v_{rs}^{qt} \Gamma_{pt}^{rs}) +\right] - +\left[ +\sum_r (h_q^r \gamma_r^p - h_r^p \gamma_q^r) + +\sum_{rst}(v_{qt}^{rs} \Gamma_{rs}^{pt} - v_{rs}^{pt} +\Gamma_{qt}^{rs}) +\right] +$$ + +Where p,q,r,s,t are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +The gradient is a mo_num by mo_num matrix, p,q,r,s,t take all the +values between 1 and mo_num (1 and mo_num include). + +To do that we compute $$G(p,q)$$ for all the pairs (p,q). + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo +E. Scuseria + +*Compute the gradient of energy with respects to orbital rotations* + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono_electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix | +| two_e_dm_mo(mo_num,mo_num,mo_num,mo_num) | double precision | two e- density matrix | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| v_grad(n) | double precision | the gradient | +| max_elem | double precision | maximum element of the gradient | + +Internal: +| grad(mo_num,mo_num) | double precison | gradient before the tranformation in a vector | +| A((mo_num,mo_num) | doubre precision | gradient after the permutations | +| norm | double precision | norm of the gradient | +| p, q | integer | indexes of the element in the matrix grad | +| i | integer | index for the tranformation in a vector | +| r, s, t | integer | indexes dor the sums | +| t1, t2, t3 | double precision | t3 = t2 - t1, time to compute the gradient | +| t4, t5, t6 | double precission | t6 = t5 - t4, time to compute each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bi-electronic integrals | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the two e- density matrix | +| tmp_accu(mo_num,mo_num) | double precision | temporary array | + +Function: +| get_two_e_integral | double precision | bi-electronic integrals | +| dnrm2 | double precision | (Lapack) norm | + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f +subroutine gradient_opt(n,v_grad,max_elem) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: v_grad(n), max_elem + + ! internal + double precision, allocatable :: grad(:,:),A(:,:) + double precision :: norm + integer :: i,p,q,r,s,t + double precision :: t1,t2,t3,t4,t5,t6 + + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:) + + ! Functions + double precision :: get_two_e_integral, dnrm2 + + + print*,'' + print*,'---gradient---' + + ! Allocation of shared arrays + allocate(grad(mo_num,mo_num),A(mo_num,mo_num)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s,t, & + !$OMP tmp_accu, tmp_bi_int_3, tmp_2rdm_3) & + !$OMP SHARED(grad, one_e_dm_mo, mo_num,mo_one_e_integrals, & + !$OMP mo_integrals_map,t4,t5,t6) & + !$OMP DEFAULT(SHARED) + + ! Allocation of private arrays + allocate(tmp_accu(mo_num,mo_num)) + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num)) +#+END_SRC + +** Calculation +*** Initialization +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + !$OMP DO + do q = 1, mo_num + do p = 1,mo_num + grad(p,q) = 0d0 + enddo + enddo + !$OMP END DO +#+END_SRC + +*** Term 1 + +Without optimization the term 1 is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + grad(p,q) = grad(p,q) & + + mo_one_e_integrals(p,r) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(r,q) * one_e_dm_mo(p,r) + enddo + enddo +enddo + +Since the matrix multiplication A.B is defined like : +\begin{equation} +c_{ij} = \sum_k a_{ik}.b_{kj} +\end{equation} +The previous equation can be rewritten as a matrix multplication + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + !**************** + ! Opt first term + !**************** + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + call dgemm('N','N',mo_num,mo_num,mo_num,1d0,mo_one_e_integrals,& + mo_num,one_e_dm_mo,mo_num,0d0,tmp_accu,mo_num) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + grad(p,q) = grad(p,q) + (tmp_accu(p,q) - tmp_accu(q,p)) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'Gradient, first term (s) :', t6 + !$OMP END MASTER +#+END_SRC + +*** Term 2 + +Without optimization the second term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + do t= 1, mo_num + + grad(p,q) = grad(p,q) & + + get_two_e_integral(p,t,r,s,mo_integrals_map) * two_e_dm_mo(r,s,q,t) & + - get_two_e_integral(r,s,q,t,mo_integrals_map) * two_e_dm_mo(p,t,r,s) + enddo + enddo + enddo + enddo +enddo + +Using the bielectronic integral properties : +get_two_e_integral(p,t,r,s,mo_integrals_map) = get_two_e_integral(r,s,p,t,mo_integrals_map) + +Using the two body matrix properties : +two_e_dm_mo(p,t,r,s) = two_e_dm_mo(r,s,p,t) + +t is one the right, we can put it on the external loop and create 3 +indexes temporary array +r,s can be seen as one index + +By doing so, a matrix multiplication appears + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + !***************** + ! Opt second term + !***************** + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do s = 1, mo_num + do r = 1, mo_num + + tmp_bi_int_3(r,s,p) = get_two_e_integral(r,s,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do q = 1, mo_num + do s = 1, mo_num + do r = 1, mo_num + + tmp_2rdm_3(r,s,q) = two_e_dm_mo(r,s,q,t) + + enddo + enddo + enddo + + call dgemm('T','N',mo_num,mo_num,mo_num*mo_num,1d0,tmp_bi_int_3,& + mo_num*mo_num,tmp_2rdm_3,mo_num*mo_num,0d0,tmp_accu,mo_num) + + !$OMP CRITICAL + do q = 1, mo_num + do p = 1, mo_num + + grad(p,q) = grad(p,q) + tmp_accu(p,q) - tmp_accu(q,p) + + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'Gradient second term (s) : ', t6 + !$OMP END MASTER +#+END_SRC + +*** Deallocation of private arrays +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + deallocate(tmp_bi_int_3,tmp_2rdm_3,tmp_accu) + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) +#+END_SRC + +*** Permutation, 2D matrix -> vector, transformation +In addition there is a permutation in the gradient formula : +\begin{equation} +P_{pq} = 1 - (p <-> q) +\end{equation} + +We need a vector to use the gradient. Here the gradient is a +antisymetric matrix so we can transform it in a vector of length +mo_num*(mo_num-1)/2. + +Here we do these two things at the same time. + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + do i=1,n + call vec_to_mat_index(i,p,q) + v_grad(i)=(grad(p,q) - grad(q,p)) + enddo + + ! Debug, diplay the vector containing the gradient elements + if (debug) then + print*,'Vector containing the gradient :' + write(*,'(100(F10.5))') v_grad(1:n) + endif +#+END_SRC + +*** Norm of the gradient +The norm can be useful. +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + norm = dnrm2(n,v_grad,1) + print*, 'Gradient norm : ', norm +#+END_SRC + +*** Maximum element in the gradient +The maximum element in the gradient is very important for the +convergence criterion of the Newton method. + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + ! Max element of the gradient + max_elem = 0d0 + do i = 1, n + if (ABS(v_grad(i)) > ABS(max_elem)) then + max_elem = v_grad(i) + endif + enddo + + print*,'Max element in the gradient :', max_elem + + ! Debug, display the matrix containting the gradient elements + if (debug) then + ! Matrix gradient + A = 0d0 + do q=1,mo_num + do p=1,mo_num + A(p,q) = grad(p,q) - grad(q,p) + enddo + enddo + print*,'Matrix containing the gradient :' + do i = 1, mo_num + write(*,'(100(F10.5))') A(i,1:mo_num) + enddo + endif +#+END_SRC + +*** Deallocation of shared arrays and end +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + deallocate(grad,A) + + print*,'---End gradient---' + + end subroutine + +#+END_SRC + diff --git a/src/mo_optimization/org/hessian_list_opt.org b/src/mo_optimization/org/hessian_list_opt.org new file mode 100644 index 00000000..3df461cf --- /dev/null +++ b/src/mo_optimization/org/hessian_list_opt.org @@ -0,0 +1,1141 @@ +* Hessian + +The hessian of the CI energy with respects to the orbital rotation is : +(C-c C-x C-l) + +\begin{align*} +H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] + -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) + + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) + - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} +With pq a permutation operator : + +\begin{align*} +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +\end{align*} +\begin{align*} +\mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +&= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +\end{align*} + +Where p,q,r,s,t,u,v are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +The hessian is a 4D matrix of size mo_num, p,q,r,s,t,u,v take all the +values between 1 and mo_num (1 and mo_num include). + +To do that we compute all the pairs (pq,rs) + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +*Compute the hessian of energy with respects to orbital rotations* + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +| two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| H(n,n) | double precision | Hessian matrix | +| h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +| | | in n by n matrix | + +Internal: +| hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +| | | the permutations | +| p, q, r, s | integer | indexes of the hessian elements | +| t, u, v | integer | indexes for the sums | +| pq, rs | integer | indexes for the transformation of the hessian | +| | | (4D -> 2D) | +| t1,t2,t3 | double precision | t3 = t2 - t1, time to compute the hessian | +| t4,t5,t6 | double precision | t6 = t5 - t4, time to compute each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix | +| ind_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for matrix multiplication | +| tmp_accu(mo_num,mo_num) | double precision | temporary array | +| tmp_accu_sym(mo_num,mo_num) | double precision | temporary array | + +Function: +| get_two_e_integral | double precision | bielectronic integrals | + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f +subroutine hessian_list_opt(n,m,list,H,h_tmpr) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n,m,list(m) + + ! out + double precision, intent(out) :: H(n,n),h_tmpr(m,m,m,m) + + ! internal + double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + double precision, allocatable :: H_test(:,:) + integer :: p,q,tmp_p,tmp_q,tmp_r,tmp_s + integer :: r,s,t,u,v,k + integer :: pq,rs + double precision :: t1,t2,t3,t4,t5,t6 + ! H_test : monum**2 by mo_num**2 double precision matrix to debug the H matrix + + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:), ind_3(:,:,:),ind_3_3(:,:,:) + double precision, allocatable :: tmp_bi_int_3_3(:,:,:), tmp_2rdm_3_3(:,:,:) + double precision, allocatable :: tmp_accu(:,:), tmp_accu_sym(:,:),tmp_one_e_dm_mo(:,:),tmp_mo_one_e_integrals(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'---hessian---' + print*,'Use the full hessian' + + ! Allocation of shared arrays + allocate(hessian(m,m,m,m),tmp_one_e_dm_mo(mo_num,m),tmp_mo_one_e_integrals(mo_num,m)) + + ! Calculations + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP tmp_p,tmp_q,tmp_r,tmp_s,p,q,r,s, tmp_accu, tmp_accu_sym, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, ind_3, tmp_bi_int_3_3,tmp_2rdm_3_3, ind_3_3 ) & + !$OMP SHARED(m,list,hessian,h_tmpr,H, mo_num,n, & + !$OMP mo_one_e_integrals, one_e_dm_mo, & + !$OMP two_e_dm_mo,mo_integrals_map, & + !$OMP t1,t2,t3,t4,t5,t6,& + !$OMP tmp_mo_one_e_integrals,tmp_one_e_dm_mo)& + !$OMP DEFAULT(NONE) + + ! Allocation of private arrays + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num), ind_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(m,m), tmp_accu_sym(mo_num,mo_num)) +#+END_SRC + +** Initialization of the arrays +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END MASTER + + !$OMP MASTER + do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_sym(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END MASTER + + !$OMP DO + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = 0d0 + enddo + enddo + enddo + enddo + !$OMP ENDDO + + !$OMP MASTER + CALL wall_TIME(t1) + !$OMP END MASTER +#+END_SRC + +** Line 1, term 1 + +Without optimization the term 1 of the line 1 is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (q==r) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + + enddo + enddo + enddo +enddo + +We can write the formula as matrix multiplication. +$$c_{p,s} = \sum_u a_{p,u} b_{u,s}$$ + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + tmp_mo_one_e_integrals(u,tmp_p) = mo_one_e_integrals(u,p) + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_s = 1, m + s = list(tmp_s) + do u = 1, mo_num + tmp_one_e_dm_mo(u,tmp_s) = one_e_dm_mo(u,s) + enddo + enddo + !$OMP END DO + + + call dgemm('T','N', m, m, mo_num, 1d0, tmp_mo_one_e_integrals,& + size(tmp_mo_one_e_integrals,1), tmp_one_e_dm_mo, size(tmp_one_e_dm_mo,1),& + 0d0, tmp_accu, size(tmp_accu,1)) + + !$OMP DO + do tmp_s = 1, m + do tmp_p = 1, m + + tmp_accu_sym(tmp_p,tmp_s) = 0.5d0 * (tmp_accu(tmp_p,tmp_s) + tmp_accu(tmp_s,tmp_p)) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_s = 1, m + do tmp_p = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_r,tmp_r,tmp_s) = hessian(tmp_p,tmp_r,tmp_r,tmp_s) + tmp_accu_sym(tmp_p,tmp_s) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 2 +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (p==s) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * (one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * (one_e_dm_mo(r,u)) + enddo + endif + + enddo + enddo + enddo +enddo + +We can write the formula as matrix multiplication. +$$c_{r,q} = \sum_u a_{r,u} b_{u,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + call dgemm('T','N', m, m, mo_num, 1d0, tmp_mo_one_e_integrals,& + size(tmp_mo_one_e_integrals,1), tmp_one_e_dm_mo, size(tmp_one_e_dm_mo,1),& + 0d0, tmp_accu, size(tmp_accu,1)) + + !$OMP DO + do tmp_r = 1, m + do tmp_q = 1, m + + tmp_accu_sym(tmp_q,tmp_r) = 0.5d0 * (tmp_accu(tmp_q,tmp_r) + tmp_accu(tmp_r,tmp_q)) + + enddo + enddo + !OMP END DO + + !$OMP DO + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_s = 1, m + + hessian(tmp_s,tmp_q,tmp_r,tmp_s) = hessian(tmp_s,tmp_q,tmp_r,tmp_s) + tmp_accu_sym(tmp_q,tmp_r) + + enddo + enddo + enddo + !OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 3 + +Without optimization the third term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s)) + + enddo + enddo + enddo +enddo + +We can just re-order the indexes + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_s = 1, m + s = list(tmp_s) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q)& + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 3',t6 + !$OMP END MASTER + +#+END_SRC + +** Line 2, term 1 + +Without optimization the fourth term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +Using bielectronic integral properties : +get_two_e_integral(s,t,u,v,mo_integrals_map) = +get_two_e_integral(u,v,s,t,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(p,t,u,v) = two_e_dm_mo(u,v,p,t) + +With t on the external loop, using temporary arrays for each t and by +taking u,v as one variable a matrix multplication appears. +$$c_{p,s} = \sum_{uv} a_{p,uv} b_{uv,s}$$ + +There is a kroenecker delta $$\delta_{qr}$$, so we juste compute the +terms like : hessian(p,r,r,s) + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + call wall_TIME(t4) + !$OMP END MASTER + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m ! error, the p might be replace by a s + ! it's a temporary array, the result by replacing p and s will be the same + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1.d0, & + tmp_bi_int_3_3, mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do tmp_p = 1, m + do tmp_s = 1, m + + tmp_accu_sym(tmp_s,tmp_p) = 0.5d0 * (tmp_accu(tmp_p,tmp_s)+tmp_accu(tmp_s,tmp_p)) + + enddo + enddo + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_p = 1, m + + hessian(tmp_p,tmp_r,tmp_r,tmp_s) = hessian(tmp_p,tmp_r,tmp_r,tmp_s) + tmp_accu_sym(tmp_p,tmp_s) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6=t5-t4 + print*,'l2 1', t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 2 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +Using the two electron density matrix properties : +get_two_e_integral(q,t,u,v,mo_integrals_map) = +get_two_e_integral(u,v,q,t,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(r,t,u,v) = two_e_dm_mo(u,v,r,t) + +With t on the external loop, using temporary arrays for each t and by +taking u,v as one variable a matrix multplication appears. +$$c_{q,r} = \sum_uv a_{q,uv} b_{uv,r}$$ + +There is a kroenecker delta $$\delta_{ps}$$, so we juste compute the +terms like : hessian(s,q,r,s) + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !****************************** + ! Opt Second line, second term + !****************************** + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + + + !$OMP DO + do t = 1, mo_num + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,v,tmp_q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_r = 1, m + r = list(tmp_r) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,v,tmp_r) = two_e_dm_mo(u,v,r,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1.d0, & + tmp_bi_int_3_3 , mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do tmp_r = 1, m + do tmp_q = 1, m + + tmp_accu_sym(tmp_q,tmp_r) = 0.5d0 * (tmp_accu(tmp_q,tmp_r) + tmp_accu(tmp_r,tmp_q)) + + enddo + enddo + + !$OMP CRITICAL + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_s = 1, m + + hessian(tmp_s,tmp_q,tmp_r,tmp_s) = hessian(tmp_s,tmp_q,tmp_r,tmp_s) + tmp_accu_sym(tmp_q,tmp_r) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l2 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 1 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + + enddo + enddo + enddo +enddo + +Using the two electron density matrix properties : +get_two_e_integral(u,v,p,r,mo_integrals_map) = +get_two_e_integral(p,r,u,v,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(u,v,q,s) = two_e_dm_mo(q,s,u,v) + +With v on the external loop, using temporary arrays for each v and by +taking p,r and q,s as one dimension a matrix multplication +appears. $$c_{pr,qs} = \sum_u a_{pr,u} b_{u,qs}$$ + +Part 1 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + call wall_TIME(t4) + !$OMP END MASTER + + !-------- + ! part 1 + ! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) + !-------- + + allocate(tmp_bi_int_3_3(m,m,mo_num), tmp_2rdm_3_3(mo_num,m,m),ind_3_3(m,m,m)) + + !$OMP DO + do v = 1, mo_num + + do u = 1, mo_num + do tmp_r = 1, m + r = list(tmp_r) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_bi_int_3_3(tmp_p,tmp_r,u) = get_two_e_integral(p,r,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + + tmp_2rdm_3_3(u,tmp_q,tmp_s) = two_e_dm_mo(q,s,u,v) + + enddo + enddo + enddo + + do tmp_s = 1, m + + call dgemm('N','N',m*m, m, mo_num, 1d0, tmp_bi_int_3_3,& + size(tmp_bi_int_3_3,1)*size(tmp_bi_int_3_3,2), tmp_2rdm_3_3(1,1,tmp_s),& + mo_num, 0d0, ind_3_3, size(ind_3_3,1) * size(ind_3_3,2)) + + !$OMP CRITICAL + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + ind_3_3(tmp_p,tmp_r,tmp_q) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3,ind_3_3) +#+END_SRC + +With v on the external loop, using temporary arrays for each v and by +taking q,s and p,r as one dimension a matrix multplication +appears. $$c_{qs,pr} = \sum_u a_{qs,u}*b_{u,pr}$$ + +Part 2 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! part 2 + ! get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + !-------- + + allocate(tmp_bi_int_3_3(m,m,mo_num), tmp_2rdm_3_3(mo_num,m,m),ind_3_3(m,m,m)) + + !$OMP DO + do v = 1, mo_num + + do u = 1, mo_num + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + + tmp_bi_int_3_3(tmp_q,tmp_s,u) = get_two_e_integral(q,s,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_r = 1, m + r = list(tmp_r) + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_2rdm_3_3(u,tmp_p,tmp_r) = two_e_dm_mo(p,r,u,v) + + enddo + enddo + enddo + + do tmp_r = 1, m + + call dgemm('N','N', m*m, m, mo_num, 1d0, tmp_bi_int_3_3,& + size(tmp_bi_int_3_3,1)*size(tmp_bi_int_3_3,2), tmp_2rdm_3_3(1,1,tmp_r),& + mo_num, 0d0, ind_3_3, size(ind_3_3,1) * size(ind_3_3,2)) + + !$OMP CRITICAL + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + ind_3_3(tmp_q,tmp_s,tmp_p) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3,ind_3_3) + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5 - t4 + print*,'l3 1', t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 2 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + do t = 1, mo_num + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + enddo + enddo + enddo +enddo + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 1 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! Part 1 + ! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) + !-------- + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_r) = two_e_dm_mo(q,u,r,t) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_s) = - get_two_e_integral(u,s,t,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_3,& + mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 2 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! Part 2 + !- get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) + !-------- + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_r) = two_e_dm_mo(q,u,t,r) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_s) = - get_two_e_integral(u,t,s,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_3,& + mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 3 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! Part 3 + !- get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) + !-------- + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_r) = - get_two_e_integral(u,q,t,r,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_s) = two_e_dm_mo(p,u,s,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_3,& + mo_num*mo_num, tmp_bi_int_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 4 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! Part 4 + ! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + !-------- + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_r) = - get_two_e_integral(u,t,r,q,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_s) = two_e_dm_mo(p,u,t,s) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_3,& + mo_num*mo_num, tmp_bi_int_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5-t4 + print*,'l3 2',t6 + !$OMP END MASTER + + !$OMP MASTER + CALL wall_TIME(t2) + t3 = t2 -t1 + print*,'Time to compute the hessian : ', t3 + !$OMP END MASTER +#+END_SRC + +** Deallocation of private arrays +In the omp section ! +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + deallocate(tmp_bi_int_3, tmp_2rdm_3, tmp_accu, tmp_accu_sym, ind_3) +#+END_SRC + +** Permutations +As we mentioned before there are two permutation operator in the +formula : +Hessian(p,q,r,s) = P_pq P_rs [...] +=> Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do s = 1, m + do r = 1, m + do q = 1, m + do p = 1, m + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5-t4 + print*,'Time for permutations :',t6 + !$OMP END MASTER +#+END_SRC + +** 4D -> 2D matrix +We need a 2D matrix for the Newton method's. Since the Hessian is +"antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +with p 2D :',t6 + !$OMP END MASTER + + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + ! Display + if (debug) then + print*,'2D Hessian matrix' + do pq = 1, n + write(*,'(100(F10.5))') H(pq,:) + enddo + endif +#+END_SRC + +** Deallocation of shared arrays, end +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + deallocate(hessian,tmp_one_e_dm_mo,tmp_mo_one_e_integrals)!,h_tmpr) +! h_tmpr is intent out in order to debug the subroutine +! It's why we don't deallocate it + + print*,'---End hessian---' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/hessian_opt.org b/src/mo_optimization/org/hessian_opt.org new file mode 100644 index 00000000..5b0642e3 --- /dev/null +++ b/src/mo_optimization/org/hessian_opt.org @@ -0,0 +1,1056 @@ +* Hessian + +The hessian of the CI energy with respects to the orbital rotation is : +(C-c C-x C-l) + +\begin{align*} +H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] + -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) + + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) + - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} +With pq a permutation operator : + +\begin{align*} +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +\end{align*} +\begin{align*} +\mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +&= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +\end{align*} + +Where p,q,r,s,t,u,v are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +The hessian is a 4D matrix of size mo_num, p,q,r,s,t,u,v take all the +values between 1 and mo_num (1 and mo_num include). + +To do that we compute all the pairs (pq,rs) + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +*Compute the hessian of energy with respects to orbital rotations* + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +| two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| H(n,n) | double precision | Hessian matrix | +| h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +| | | in n by n matrix | + +Internal: +| hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +| | | the permutations | +| p, q, r, s | integer | indexes of the hessian elements | +| t, u, v | integer | indexes for the sums | +| pq, rs | integer | indexes for the transformation of the hessian | +| | | (4D -> 2D) | +| t1,t2,t3 | double precision | t3 = t2 - t1, time to compute the hessian | +| t4,t5,t6 | double precision | t6 = t5 - t4, time to compute each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix | +| ind_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for matrix multiplication | +| tmp_accu(mo_num,mo_num) | double precision | temporary array | +| tmp_accu_sym(mo_num,mo_num) | double precision | temporary array | + +Function: +| get_two_e_integral | double precision | bielectronic integrals | + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f +subroutine hessian_opt(n,H,h_tmpr) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: H(n,n),h_tmpr(mo_num,mo_num,mo_num,mo_num) + + ! internal + double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + double precision, allocatable :: H_test(:,:) + integer :: p,q + integer :: r,s,t,u,v,k + integer :: pq,rs + double precision :: t1,t2,t3,t4,t5,t6 + ! H_test : monum**2 by mo_num**2 double precision matrix to debug the H matrix + + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:), ind_3(:,:,:) + double precision, allocatable :: tmp_accu(:,:), tmp_accu_sym(:,:), tmp_accu_shared(:,:),tmp_accu_sym_shared(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'---hessian---' + print*,'Use the full hessian' + + ! Allocation of shared arrays + allocate(hessian(mo_num,mo_num,mo_num,mo_num))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_accu_shared(mo_num,mo_num),tmp_accu_sym_shared(mo_num,mo_num)) + + ! Calculations + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu, tmp_accu_sym, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, ind_3) & + !$OMP SHARED(hessian,h_tmpr,H, mo_num,n, & + !$OMP mo_one_e_integrals, one_e_dm_mo, & + !$OMP two_e_dm_mo,mo_integrals_map,tmp_accu_sym_shared, tmp_accu_shared, & + !$OMP t1,t2,t3,t4,t5,t6)& + !$OMP DEFAULT(NONE) + + ! Allocation of private arrays + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num), ind_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(mo_num,mo_num), tmp_accu_sym(mo_num,mo_num)) +#+END_SRC + +** Initialization of the arrays +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo + enddo + !$OMP END MASTER + + !$OMP MASTER + do q = 1, mo_num + do p = 1, mo_num + tmp_accu_sym(p,q) = 0d0 + enddo + enddo + !$OMP END MASTER + + !$OMP DO + do s=1,mo_num + do r=1,mo_num + do q=1,mo_num + do p=1,mo_num + hessian(p,q,r,s) = 0d0 + enddo + enddo + enddo + enddo + !$OMP ENDDO + + !$OMP MASTER + CALL wall_TIME(t1) + !$OMP END MASTER +#+END_SRC + +** Line 1, term 1 + +Without optimization the term 1 of the line 1 is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (q==r) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + + enddo + enddo + enddo +enddo + +We can write the formula as matrix multiplication. +$$c_{p,s} = \sum_u a_{p,u} b_{u,s}$$ + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + call dgemm('T','N', mo_num, mo_num, mo_num, 1d0, mo_one_e_integrals,& + size(mo_one_e_integrals,1), one_e_dm_mo, size(one_e_dm_mo,1),& + 0d0, tmp_accu_shared, size(tmp_accu_shared,1)) + + !$OMP DO + do s = 1, mo_num + do p = 1, mo_num + + tmp_accu_sym_shared(p,s) = 0.5d0 * (tmp_accu_shared(p,s) + tmp_accu_shared(s,p)) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do s = 1, mo_num + do p = 1, mo_num + do r = 1, mo_num + + hessian(p,r,r,s) = hessian(p,r,r,s) + tmp_accu_sym_shared(p,s) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 2 +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (p==s) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * (one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * (one_e_dm_mo(r,u)) + enddo + endif + + enddo + enddo + enddo +enddo + +We can write the formula as matrix multiplication. +$$c_{r,q} = \sum_u a_{r,u} b_{u,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + call dgemm('T','N', mo_num, mo_num, mo_num, 1d0, mo_one_e_integrals,& + size(mo_one_e_integrals,1), one_e_dm_mo, size(one_e_dm_mo,1),& + 0d0, tmp_accu_shared, size(tmp_accu_shared,1)) + + !$OMP DO + do r = 1, mo_num + do q = 1, mo_num + + tmp_accu_sym_shared(q,r) = 0.5d0 * (tmp_accu_shared(q,r) + tmp_accu_shared(r,q)) + + enddo + enddo + !OMP END DO + + !$OMP DO + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + + hessian(s,q,r,s) = hessian(s,q,r,s) + tmp_accu_sym_shared(q,r) + + enddo + enddo + enddo + !OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 3 + +Without optimization the third term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s)) + + enddo + enddo + enddo +enddo + +We can just re-order the indexes + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q)& + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 3',t6 + !$OMP END MASTER + +#+END_SRC + +** Line 2, term 1 + +Without optimization the fourth term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +Using bielectronic integral properties : +get_two_e_integral(s,t,u,v,mo_integrals_map) = +get_two_e_integral(u,v,s,t,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(p,t,u,v) = two_e_dm_mo(u,v,p,t) + +With t on the external loop, using temporary arrays for each t and by +taking u,v as one variable a matrix multplication appears. +$$c_{p,s} = \sum_{uv} a_{p,uv} b_{uv,s}$$ + +There is a kroenecker delta $$\delta_{qr}$$, so we juste compute the +terms like : hessian(p,r,r,s) + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + call wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num ! error, the p might be replace by a s + ! it's a temporary array, the result by replacing p and s will be the same + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1.d0, & + tmp_bi_int_3, mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do p = 1, mo_num + do s = 1, mo_num + + tmp_accu_sym(s,p) = 0.5d0 * (tmp_accu(p,s)+tmp_accu(s,p)) + + enddo + enddo + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + do p = 1, mo_num + + hessian(p,r,r,s) = hessian(p,r,r,s) + tmp_accu_sym(p,s) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6=t5-t4 + print*,'l2 1', t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 2 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +Using the two electron density matrix properties : +get_two_e_integral(q,t,u,v,mo_integrals_map) = +get_two_e_integral(u,v,q,t,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(r,t,u,v) = two_e_dm_mo(u,v,r,t) + +With t on the external loop, using temporary arrays for each t and by +taking u,v as one variable a matrix multplication appears. +$$c_{q,r} = \sum_uv a_{q,uv} b_{uv,r}$$ + +There is a kroenecker delta $$\delta_{ps}$$, so we juste compute the +terms like : hessian(s,q,r,s) + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !****************************** + ! Opt Second line, second term + !****************************** + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do t = 1, mo_num + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do r = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,r) = two_e_dm_mo(u,v,r,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1.d0, & + tmp_bi_int_3 , mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do r = 1, mo_num + do q = 1, mo_num + + tmp_accu_sym(q,r) = 0.5d0 * (tmp_accu(q,r) + tmp_accu(r,q)) + + enddo + enddo + + !$OMP CRITICAL + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + + hessian(s,q,r,s) = hessian(s,q,r,s) + tmp_accu_sym(q,r) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l2 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 1 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + + enddo + enddo + enddo +enddo + +Using the two electron density matrix properties : +get_two_e_integral(u,v,p,r,mo_integrals_map) = +get_two_e_integral(p,r,u,v,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(u,v,q,s) = two_e_dm_mo(q,s,u,v) + +With v on the external loop, using temporary arrays for each v and by +taking p,r and q,s as one dimension a matrix multplication +appears. $$c_{pr,qs} = \sum_u a_{pr,u} b_{u,qs}$$ + +Part 1 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + call wall_TIME(t4) + !$OMP END MASTER + + !-------- + ! part 1 + ! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) + !-------- + + !$OMP DO + do v = 1, mo_num + + do u = 1, mo_num + do r = 1, mo_num + do p = 1, mo_num + + tmp_bi_int_3(p,r,u) = get_two_e_integral(p,r,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do s = 1, mo_num + do q = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,q,s) = two_e_dm_mo(q,s,u,v) + + enddo + enddo + enddo + + do s = 1, mo_num + + call dgemm('N','N',mo_num*mo_num, mo_num, mo_num, 1d0, tmp_bi_int_3,& + size(tmp_bi_int_3,1)*size(tmp_bi_int_3,2), tmp_2rdm_3(1,1,s),& + size(tmp_2rdm_3,1), 0d0, ind_3, size(ind_3,1) * size(ind_3,2)) + + !$OMP CRITICAL + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + hessian(p,q,r,s) = hessian(p,q,r,s) + ind_3(p,r,q) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + +#+END_SRC + +With v on the external loop, using temporary arrays for each v and by +taking q,s and p,r as one dimension a matrix multplication +appears. $$c_{qs,pr} = \sum_u a_{qs,u}*b_{u,pr}$$ + +Part 2 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! part 2 + ! get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + !-------- + + !$OMP DO + do v = 1, mo_num + + do u = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + + tmp_bi_int_3(q,s,u) = get_two_e_integral(q,s,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do r = 1, mo_num + do p = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,p,r) = two_e_dm_mo(p,r,u,v) + + enddo + enddo + enddo + + do r = 1, mo_num + call dgemm('N','N', mo_num*mo_num, mo_num, mo_num, 1d0, tmp_bi_int_3,& + size(tmp_bi_int_3,1)*size(tmp_bi_int_3,2), tmp_2rdm_3(1,1,r),& + size(tmp_2rdm_3,1), 0d0, ind_3, size(ind_3,1) * size(ind_3,2)) + + !$OMP CRITICAL + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + hessian(p,q,r,s) = hessian(p,q,r,s) + ind_3(q,s,p) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5 - t4 + print*,'l3 1', t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 2 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + do t = 1, mo_num + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + enddo + enddo + enddo +enddo + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 1 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! Part 1 + ! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) + !-------- + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,r) = two_e_dm_mo(q,u,r,t) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,s) = - get_two_e_integral(u,s,t,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3,& + mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 2 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! Part 2 + !- get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) + !-------- + + !$OMP DO + do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,r) = two_e_dm_mo(q,u,t,r) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,s) = - get_two_e_integral(u,t,s,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3,& + mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 3 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! Part 3 + !- get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) + !-------- + + !$OMP DO + do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,r) = - get_two_e_integral(u,q,t,r,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,s) = two_e_dm_mo(p,u,s,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3,& + mo_num*mo_num, tmp_bi_int_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 4 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! Part 4 + ! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + !-------- + + !$OMP DO + do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,r) = - get_two_e_integral(u,t,r,q,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,s) = two_e_dm_mo(p,u,t,s) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3,& + mo_num*mo_num, tmp_bi_int_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5-t4 + print*,'l3 2',t6 + !$OMP END MASTER + + !$OMP MASTER + CALL wall_TIME(t2) + t3 = t2 -t1 + print*,'Time to compute the hessian : ', t3 + !$OMP END MASTER +#+END_SRC + +** Deallocation of private arrays +In the omp section ! +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + deallocate(tmp_bi_int_3, tmp_2rdm_3, tmp_accu, tmp_accu_sym, ind_3) +#+END_SRC + +** Permutations +As we mentioned before there are two permutation operator in the +formula : +Hessian(p,q,r,s) = P_pq P_rs [...] +=> Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5-t4 + print*,'Time for permutations :',t6 + !$OMP END MASTER +#+END_SRC + +** 4D -> 2D matrix +We need a 2D matrix for the Newton method's. Since the Hessian is +"antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +with p 2D :',t6 + !$OMP END MASTER + + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + ! Display + if (debug) then + print*,'2D Hessian matrix' + do pq = 1, n + write(*,'(100(F10.5))') H(pq,:) + enddo + endif +#+END_SRC + +** Deallocation of shared arrays, end +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + deallocate(hessian)!,h_tmpr) +! h_tmpr is intent out in order to debug the subroutine +! It's why we don't deallocate it + + print*,'---End hessian---' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/my_providers.org b/src/mo_optimization/org/my_providers.org new file mode 100644 index 00000000..b88cbd62 --- /dev/null +++ b/src/mo_optimization/org/my_providers.org @@ -0,0 +1,308 @@ +* Providers +** Dimensions of MOs + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of MOs we can build, + ! with i>j + END_DOC + + n_mo_dim = mo_num*(mo_num-1)/2 + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim_core ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of core MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_core = dim_list_core_orb*(dim_list_core_orb-1)/2 + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim_act ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of active MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_act = dim_list_act_orb*(dim_list_act_orb-1)/2 + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim_inact ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of inactive MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_inact = dim_list_inact_orb*(dim_list_inact_orb-1)/2 + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim_virt ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of virtual MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_virt = dim_list_virt_orb*(dim_list_virt_orb-1)/2 + +END_PROVIDER +#+END_SRC + +** Energies/criterions +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_st_av_energy ] + implicit none + BEGIN_DOC + ! State average CI energy + END_DOC + + !call update_st_av_ci_energy(my_st_av_energy) + call state_average_energy(my_st_av_energy) + +END_PROVIDER +#+END_SRC + +** Gradient/hessian +*** Orbital optimization +**** With all the MOs +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_opt, (n_mo_dim) ] +&BEGIN_PROVIDER [ double precision, my_CC1_opt ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, for all the MOs. + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + PROVIDE mo_two_e_integrals_in_map + + call gradient_opt(n_mo_dim, my_gradient_opt, my_CC1_opt, norm_grad) + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_opt, (n_mo_dim, n_mo_dim) ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, for all the MOs. + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision, allocatable :: h_f(:,:,:,:) + + PROVIDE mo_two_e_integrals_in_map + + allocate(h_f(mo_num, mo_num, mo_num, mo_num)) + + call hessian_list_opt(n_mo_dim, my_hessian_opt, h_f) + +END_PROVIDER +#+END_SRC + +**** With the list of active MOs +Can be generalized to any mo_class by changing the list/dimension +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_list_opt, (n_mo_dim_act) ] +&BEGIN_PROVIDER [ double precision, my_CC2_opt ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + PROVIDE mo_two_e_integrals_in_map !one_e_dm_mo two_e_dm_mo mo_one_e_integrals + + call gradient_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_gradient_list_opt, my_CC2_opt, norm_grad) + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_list_opt, (n_mo_dim_act, n_mo_dim_act) ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision, allocatable :: h_f(:,:,:,:) + + PROVIDE mo_two_e_integrals_in_map + + allocate(h_f(dim_list_act_orb, dim_list_act_orb, dim_list_act_orb, dim_list_act_orb)) + + call hessian_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_hessian_list_opt, h_f) + +END_PROVIDER +#+END_SRC + +*** Orbital localization +**** Gradient +***** Core MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_loc_core, (n_mo_dim_core) ] +&BEGIN_PROVIDER [ double precision, my_CC_loc_core ] + implicit none + BEGIN_DOC + ! - Gradient of the MO localization with respect to the MO rotations for the core MOs + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + !PROVIDE something ? + + call gradient_localization(n_mo_dim_core, dim_list_core_orb, list_core, my_gradient_loc_core, my_CC_loc_core , norm_grad) + +END_PROVIDER +#+END_SRC + +***** Active MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_loc_act, (n_mo_dim_act) ] +&BEGIN_PROVIDER [ double precision, my_CC_loc_act ] + implicit none + BEGIN_DOC + ! - Gradient of the MO localization with respect to the MO rotations for the active MOs + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + !PROVIDE something ? + + call gradient_localization(n_mo_dim_act, dim_list_act_orb, list_act, my_gradient_loc_act, my_CC_loc_act , norm_grad) + +END_PROVIDER +#+END_SRC + +***** Inactive MOs +#+BEGIN_SRC f90 :comments org ! +:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_loc_inact, (n_mo_dim_inact) ] +&BEGIN_PROVIDER [ double precision, my_CC_loc_inact ] + implicit none + BEGIN_DOC + ! - Gradient of the MO localization with respect to the MO rotations for the inactive MOs + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + !PROVIDE something ? + + call gradient_localization(n_mo_dim_inact, dim_list_inact_orb, list_inact, my_gradient_loc_inact, my_CC_loc_inact , norm_grad) + +END_PROVIDER +#+END_SRC + +***** Virtual MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_loc_virt, (n_mo_dim_virt) ] +&BEGIN_PROVIDER [ double precision, my_CC_loc_virt ] + implicit none + BEGIN_DOC + ! - Gradient of the MO localization with respect to the MO rotations for the virtual MOs + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + !PROVIDE something ? + + call gradient_localization(n_mo_dim_virt, dim_list_virt_orb, list_virt, my_gradient_loc_virt, my_CC_loc_virt , norm_grad) + +END_PROVIDER +#+END_SRC + +**** Hessian +***** Core MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_loc_core, (n_mo_dim_core) ] + implicit none + BEGIN_DOC + ! - Hessian of the MO localization with respect to the MO rotations for the core MOs + END_DOC + + !PROVIDE something ? + + call hessian_localization(n_mo_dim_core, dim_list_core_orb, list_core, my_hessian_loc_core) + +END_PROVIDER +#+END_SRC + +***** Active MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_loc_act, (n_mo_dim_act) ] + implicit none + BEGIN_DOC + ! - Hessian of the MO localization with respect to the MO rotations for the active MOs + END_DOC + + !PROVIDE something ? + + call hessian_localization(n_mo_dim_act, dim_list_act_orb, list_act, my_hessian_loc_act) + +END_PROVIDER +#+END_SRC + +***** Inactive MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_loc_inact, (n_mo_dim_inact) ] + implicit none + BEGIN_DOC + ! - Hessian of the MO localization with respect to the MO rotations for the inactive MOs + END_DOC + + !PROVIDE something ? + + call hessian_localization(n_mo_dim_inact, dim_list_inact_orb, list_inact, my_hessian_loc_inact) + +END_PROVIDER +#+END_SRC + +***** Virtual MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_loc_virt, (n_mo_dim_virt) ] + implicit none + BEGIN_DOC + ! - Hessian of the MO localization with respect to the MO rotations for the virtual MOs + END_DOC + + !PROVIDE something ? + + call hessian_localization(n_mo_dim_virt, dim_list_virt_orb, list_virt, my_hessian_loc_virt) + +END_PROVIDER +#+END_SRC + diff --git a/src/mo_optimization/org/optimization.org b/src/mo_optimization/org/optimization.org new file mode 100644 index 00000000..cbb7b752 --- /dev/null +++ b/src/mo_optimization/org/optimization.org @@ -0,0 +1,91 @@ +#+BEGIN_SRC f90 :comments org :tangle optimization.irp.f +program optimization + + read_wf = .true. ! must be True for the orbital optimization !!! + TOUCH read_wf + call run_optimization + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle optimization.irp.f +subroutine run_optimization + + implicit none + + double precision :: e_cipsi, e_opt, delta_e + integer :: nb_iter,i + logical :: not_converged + character (len=100) :: filename + + PROVIDE psi_det psi_coef mo_two_e_integrals_in_map + + not_converged = .True. + nb_iter = 0 + + ! To start from the wf + N_det_max = max(n_det,5) + TOUCH N_det_max + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt') + write(10,*) " Ndet E_cipsi E_opt Delta_e" + call state_average_energy(e_cipsi) + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_cipsi, 0d0 + close(10) + + do while (not_converged) + print*,'' + print*,'======================' + print*,' Cipsi step:', nb_iter + print*,'======================' + print*,'' + print*,'********** cipsi step **********' + ! cispi calculation + call run_stochastic_cipsi + + ! State average energy after the cipsi step + call state_average_energy(e_cipsi) + + print*,'' + print*,'********** optimization step **********' + ! orbital optimization + call run_orb_opt_trust_v2 + + ! State average energy after the orbital optimization + call state_average_energy(e_opt) + + print*,'' + print*,'********** diff step **********' + ! Gain in energy + delta_e = e_opt - e_cipsi + print*, 'Gain in energy during the orbital optimization:', delta_e + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt', position='append') + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_opt, delta_e + close(10) + + ! Exit + if (delta_e > 1d-12) then + print*, 'WARNING, something wrong happened' + print*, 'The gain (delta_e) in energy during the optimization process' + print*, 'is > 0, but it must be < 0' + print*, 'The program will exit' + exit + endif + + if (n_det > n_det_max_opt) then + print*, 'The number of determinants in the wf > n_det_max_opt' + print*, 'The program will exit' + exit + endif + + ! To double the number of determinants in the wf + N_det_max = int(dble(n_det * 2)*0.9) + TOUCH N_det_max + + nb_iter = nb_iter + 1 + enddo + +end + +#+END_SRC diff --git a/src/mo_optimization/org/orb_opt_trust_v2.org b/src/mo_optimization/org/orb_opt_trust_v2.org new file mode 100644 index 00000000..dbcd3c19 --- /dev/null +++ b/src/mo_optimization/org/orb_opt_trust_v2.org @@ -0,0 +1,349 @@ +* Orbital optimization program + +This is an optimization program for molecular orbitals. It produces +orbital rotations in order to lower the energy of a truncated wave +function. +This program just optimize the orbitals for a fixed number of +determinants. This optimization process must be repeated for different +number of determinants. + +#+BEGIN_SRC f90 :comments org :tangle orb_opt.irp.f +#+END_SRC + +* Main program : orb_opt_trust + +#+BEGIN_SRC f90 :comments org :tangle orb_opt.irp.f +program orb_opt + read_wf = .true. ! must be True for the orbital optimization !!! + TOUCH read_wf + io_mo_two_e_integrals = 'None' + TOUCH io_mo_two_e_integrals + call run_orb_opt_trust_v2 +end +#+END_SRC + +* Subroutine : run_orb_opt_trust + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + +#+END_SRC + +Subroutine to optimize the MOs using a trust region algorithm: +- choice of the method +- initialization +- optimization until convergence + +The optimization use the trust region algorithm, the different parts +are explained in the corresponding subroutine files. + +qp_edit: +| thresh_opt_max_elem_grad | +| optimization_max_nb_iter | +| optimization_method | + +Provided: +| mo_num | integer | number of MOs | +| ao_num | integer | number of AOs | +| N_states | integer | number of states | +| ci_energy(N_states) | double precision | CI energies | +| state_average_weight(N_states) | double precision | Weight of the different states | + +Variables: +| m | integer | number of active MOs | +| tmp_n | integer | m*(m-1)/2, number of MO parameters | +| tmp_n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +| v_grad(tmp_n) | double precision | gradient | +| H(tmp_n,tmp_n) | double precision | hessian (2D) | +| h_f(m,m,m,m) | double precision | hessian (4D) | +| e_val(m) | double precision | eigenvalues of the hessian | +| w(m,m) | double precision | eigenvectors of the hessian | +| x(m) | double precision | step given by the trust region | +| m_x(m,m) | double precision | step given by the trust region after | +| tmp_R(m,m) | double precision | rotation matrix for active MOs | +| R(mo_num,mo_num) | double precision | full rotation matrix | +| prev_mos(ao_num,mo_num) | double precision | previous MOs (before the rotation) | +| new_mos(ao_num,mo_num) | double precision | new MOs (after the roration) | +| delta | double precision | radius of the trust region | +| rho | double precision | agreement between the model and the exact function | +| max_elem | double precision | maximum element in the gradient | +| i | integer | index | +| tmp_i,tmp_j | integer | indexes in the subspace containing only | +| | | the active MOs | +| converged | logical | convergence of the algorithm | +| cancel_step | logical | if the step must be cancelled | +| nb_iter | integer | number of iterations (accepted) | +| nb_diag | integer | number of diagonalizations of the CI matrix | +| nb_cancel | integer | number of cancelled steps for the actual iteration | +| nb_cancel_tot | integer | total number of cancel steps | +| info | integer | if 0 ok, else problem in the diagonalization of | +| | | the hessian with the Lapack routine | +| criterion | double precision | energy at a given step | +| prev_criterion | double precision | energy before the rotation | +| criterion_model | double precision | estimated energy after the rotation using | +| | | a Taylor series | +| must_exit | logical | To exit the trust region algorithm when | +| | | criterion - criterion_model is too small | +| enforce_step_cancellation | logical | To force the cancellation of the step if the | +| | | error in the rotation matrix is too large | + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f +subroutine run_orb_opt_trust_v2 + + include 'constants.h' + + implicit none + + BEGIN_DOC + ! Orbital optimization + END_DOC + + ! Variables + + double precision, allocatable :: R(:,:) + double precision, allocatable :: H(:,:),h_f(:,:,:,:) + double precision, allocatable :: v_grad(:) + double precision, allocatable :: prev_mos(:,:),new_mos(:,:) + integer :: info + integer :: n + integer :: i,j,p,q,k + double precision :: max_elem_grad, delta, rho, norm_grad, normalization_factor + logical :: cancel_step + integer :: nb_iter, nb_diag, nb_cancel, nb_cancel_tot, nb_sub_iter + double precision :: t1, t2, t3 + double precision :: prev_criterion, criterion, criterion_model + logical :: not_converged, must_exit, enforce_step_cancellation + integer :: m, tmp_n, tmp_i, tmp_j, tmp_k, tmp_n2 + integer,allocatable :: tmp_list(:), key(:) + double precision, allocatable :: tmp_m_x(:,:),tmp_R(:,:), tmp_x(:), W(:,:), e_val(:) + + PROVIDE mo_two_e_integrals_in_map ci_energy psi_det psi_coef +#+END_SRC + +** Allocation + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + allocate(R(mo_num,mo_num)) ! rotation matrix + allocate(prev_mos(ao_num,mo_num), new_mos(ao_num,mo_num)) ! old and new MOs + + ! Definition of m and tmp_n + m = dim_list_act_orb + tmp_n = m*(m-1)/2 + + allocate(tmp_list(m)) + allocate(tmp_R(m,m), tmp_m_x(m,m), tmp_x(tmp_n)) + allocate(e_val(tmp_n),key(tmp_n),v_grad(tmp_n)) + +#+END_SRC + +** Method + There are three different methods : + - the "full" hessian, which uses all the elements of the hessian + matrix" + - the "diagonal" hessian, which uses only the diagonal elements of the + hessian + - without the hessian (hessian = identity matrix) + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + !Display the method + print*, 'Method :', optimization_method + if (optimization_method == 'full') then + print*, 'Full hessian' + allocate(H(tmp_n,tmp_n), h_f(m,m,m,m),W(tmp_n,tmp_n)) + tmp_n2 = tmp_n + elseif (optimization_method == 'diag') then + print*,'Diagonal hessian' + allocate(H(tmp_n,1),W(tmp_n,1)) + tmp_n2 = 1 + elseif (optimization_method == 'none') then + print*,'No hessian' + allocate(H(tmp_n,1),W(tmp_n,1)) + tmp_n2 = 1 + else + print*,'Unknown optimization_method, please select full, diag or none' + call abort + endif + print*, 'Absolute value of the hessian:', absolute_eig +#+END_SRC + +** Calculations +*** Algorithm + +Here is the main algorithm of the optimization: +- First of all we initialize some parameters and we compute the + criterion (the ci energy) before doing any MO rotations +- We compute the gradient and the hessian for the active MOs +- We diagonalize the hessian +- We compute a step and loop to reduce the radius of the + trust region (and the size of the step by the way) until the step is + accepted +- We repeat the process until the convergence + NB: the convergence criterion can be changed + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + ! Loop until the convergence of the optimization + ! call diagonalize_ci + + !### Initialization ### + nb_iter = 0 + rho = 0.5d0 + not_converged = .True. + tmp_list = list_act ! Optimization of the active MOs + nb_cancel_tot = 0 + + ! Renormalization of the weights of the states + call state_weight_normalization + + ! Compute the criterion before the loop + call state_average_energy(prev_criterion) + + do while (not_converged) + print*,'' + print*,'******************' + print*,'Iteration', nb_iter + print*,'******************' + print*,'' + + ! Gradient + call gradient_list_opt(tmp_n, m, tmp_list, v_grad, max_elem_grad, norm_grad) + + ! Hessian + if (optimization_method == 'full') then + ! Full hessian + call hessian_list_opt(tmp_n, m, tmp_list, H, h_f) + + ! Diagonalization of the hessian + call diagonalization_hessian(tmp_n, H, e_val, w) + + elseif (optimization_method == 'diag') then + ! Diagonal hessian + call diag_hessian_list_opt(tmp_n, m, tmp_list, H) + else + ! Identity matrix + do tmp_i = 1, tmp_n + H(tmp_i,1) = 1d0 + enddo + endif + + if (optimization_method /= 'full') then + ! Sort + do tmp_i = 1, tmp_n + key(tmp_i) = tmp_i + e_val(tmp_i) = H(tmp_i,1) + enddo + call dsort(e_val,key,tmp_n) + + ! Eigenvalues and eigenvectors + do tmp_i = 1, tmp_n + w(tmp_i,1) = dble(key(tmp_i)) + enddo + + endif + + ! Init before the internal loop + cancel_step = .True. ! To enter in the loop just after + nb_cancel = 0 + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'' + print*,'-----------------------------' + print*,'Iteration: ', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'Max elem grad:', max_elem_grad + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,tmp_n2,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) + + if (must_exit) then + print*,'step_in_trust_region sends: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, m, tmp_x, tmp_m_x) + + ! Rotation matrix for the active MOs + call rotation_matrix(tmp_m_x, m, tmp_R, m, m, info, enforce_step_cancellation) + + ! Security to ensure an unitary transformation + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(m, tmp_list, tmp_R, R) + + ! MO rotations + call apply_mo_rotation(R, prev_mos) + + ! Update of the energy before the diagonalization of the hamiltonian + call clear_mo_map + TOUCH mo_coef psi_det psi_coef ci_energy two_e_dm_mo + call state_average_energy(criterion) + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) + + ! Cancellation of the step if necessary + if (cancel_step) then + mo_coef = prev_mos + call save_mos() + nb_cancel = nb_cancel + 1 + nb_cancel_tot = nb_cancel_tot + 1 + else + ! Diagonalization of the hamiltonian + FREE ci_energy! To enforce the recomputation + call diagonalize_ci + call save_wavefunction_unsorted + + ! Energy obtained after the diagonalization of the CI matrix + call state_average_energy(prev_criterion) + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exit = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem_grad) < thresh_opt_max_elem_grad) then + print*,'Converged: DABS(max_elem_grad) < thresh_opt_max_elem_grad' + not_converged = .False. + endif + if (nb_iter >= optimization_max_nb_iter) then + print*,'Not converged: nb_iter >= optimization_max_nb_iter' + not_converged = .False. + endif + + if (.not. not_converged) then + print*,'#############################' + print*,' End of the optimization' + print*,'#############################' + endif + enddo + +#+END_SRC + +** Deallocation, end + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + deallocate(v_grad,H,R,W,e_val) + deallocate(prev_mos,new_mos) + if (optimization_method == 'full') then + deallocate(h_f) + endif + +end +#+END_SRC + diff --git a/src/mo_optimization/org/state_average_energy.org b/src/mo_optimization/org/state_average_energy.org new file mode 100644 index 00000000..b779845a --- /dev/null +++ b/src/mo_optimization/org/state_average_energy.org @@ -0,0 +1,73 @@ +* State average energy + +Calculation of the state average energy from the integrals and the +density matrices. + +\begin{align*} +E = \sum_{ij} h_{ij} \gamma_{ij} + \frac{1}{2} v_{ij}^{kl} \Gamma_{ij}^{kl} +\end{align*} +$h_{ij}$: mono-electronic integral +$\gamma_{ij}$: one electron density matrix +$v_{ij}^{kl}$: bi-electronic integral +$\Gamma_{ij}^{kl}$: two electrons density matrix + +TODO: OMP version + +PROVIDED: +| mo_one_e_integrals | double precision | mono-electronic integrals | +| get_two_e_integral | double precision | bi-electronic integrals | +| one_e_dm_mo | double precision | one electron density matrix | +| two_e_dm_mo | double precision | two electrons density matrix | +| nuclear_repulsion | double precision | nuclear repulsion | +| mo_num | integer | number of MOs | + +Output: +| energy | double precision | state average energy | + +Internal: +| mono_e | double precision | mono-electronic energy | +| bi_e | double precision | bi-electronic energy | +| i,j,k,l | integer | indexes to loop over the MOs | + +#+BEGIN_SRC f90 :comments org :tangle state_average_energy.irp.f +subroutine state_average_energy(energy) + + implicit none + + double precision, intent(out) :: energy + + double precision :: get_two_e_integral + double precision :: mono_e, bi_e + integer :: i,j,k,l + + ! mono electronic part + mono_e = 0d0 + do j = 1, mo_num + do i = 1, mo_num + mono_e = mono_e + mo_one_e_integrals(i,j) * one_e_dm_mo(i,j) + enddo + enddo + + ! bi electronic part + bi_e = 0d0 + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + bi_e = bi_e + get_two_e_integral(i,j,k,l,mo_integrals_map) * two_e_dm_mo(i,j,k,l) + enddo + enddo + enddo + enddo + + ! State average energy + energy = mono_e + 0.5d0 * bi_e + nuclear_repulsion + + ! Check + !call print_energy_components + + print*,'State average energy:', energy + !print*,ci_energy + +end +#+END_SRC diff --git a/src/mo_optimization/org/state_weight_normalization.org b/src/mo_optimization/org/state_weight_normalization.org new file mode 100644 index 00000000..492ad3d4 --- /dev/null +++ b/src/mo_optimization/org/state_weight_normalization.org @@ -0,0 +1,31 @@ +#+BEGIN_SRC f90 :comments org :tangle state_weight_normalization.irp.f +subroutine state_weight_normalization + + implicit none + + BEGIN_DOC + ! Renormalization of the state weights or enforcing state average + ! weights for orbital optimization + END_DOC + + integer :: i + double precision :: normalization_factor + + ! To normalize the sum of the state weights + normalization_factor = 0d0 + do i = 1, N_states + normalization_factor = normalization_factor + state_average_weight(i) + enddo + normalization_factor = 1d0 / normalization_factor + + do i = 1, N_states + state_average_weight(i) = state_average_weight(i) * normalization_factor + enddo + TOUCH state_average_weight + + print*, 'Number of states:', N_states + print*, 'State average weights:' + print*, state_average_weight(:) + +end +#+END_SRC diff --git a/src/mo_optimization/org/update_parameters.org b/src/mo_optimization/org/update_parameters.org new file mode 100644 index 00000000..cd9b9595 --- /dev/null +++ b/src/mo_optimization/org/update_parameters.org @@ -0,0 +1,16 @@ +Subroutine toupdate the parameters. +Ex: TOUCH mo_coef ... + +#+BEGIN_SRC f90 :comments org :tangle update_parameters.irp.f +subroutine update_parameters() + + implicit none + + !### TODO + ! Touch yours parameters + call clear_mo_map + TOUCH mo_coef psi_det psi_coef + call diagonalize_ci + call save_wavefunction_unsorted +end +#+END_SRC diff --git a/src/mo_optimization/org/update_st_av_ci_energy.org b/src/mo_optimization/org/update_st_av_ci_energy.org new file mode 100644 index 00000000..2dc7f3ee --- /dev/null +++ b/src/mo_optimization/org/update_st_av_ci_energy.org @@ -0,0 +1,26 @@ +* Update the CI state average energy + +Computes the state average energy +\begin{align*} +E =\sum_{i=1}^{N_{states}} E_i . w_i +\end{align*} + +$E_i$: energy of state i +$w_i$: weight of state i + +#+BEGIN_SRC f90 :comments org :tangle update_st_av_ci_energy.irp.f +subroutine update_st_av_ci_energy(energy) + + implicit none + + double precision, intent(out) :: energy + integer :: i + + energy = 0d0 + do i = 1, N_states + energy = energy + ci_energy(i) * state_average_weight(i) + enddo + + print*, 'ci_energy :', energy +end +#+END_SRC diff --git a/src/mo_optimization/run_orb_opt_trust_v2.irp.f b/src/mo_optimization/run_orb_opt_trust_v2.irp.f new file mode 100644 index 00000000..e1431255 --- /dev/null +++ b/src/mo_optimization/run_orb_opt_trust_v2.irp.f @@ -0,0 +1,317 @@ +! Subroutine : run_orb_opt_trust + + + + + + +! Subroutine to optimize the MOs using a trust region algorithm: +! - choice of the method +! - initialization +! - optimization until convergence + +! The optimization use the trust region algorithm, the different parts +! are explained in the corresponding subroutine files. + +! qp_edit: +! | thresh_opt_max_elem_grad | +! | optimization_max_nb_iter | +! | optimization_method | + +! Provided: +! | mo_num | integer | number of MOs | +! | ao_num | integer | number of AOs | +! | N_states | integer | number of states | +! | ci_energy(N_states) | double precision | CI energies | +! | state_average_weight(N_states) | double precision | Weight of the different states | + +! Variables: +! | m | integer | number of active MOs | +! | tmp_n | integer | m*(m-1)/2, number of MO parameters | +! | tmp_n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +! | v_grad(tmp_n) | double precision | gradient | +! | H(tmp_n,tmp_n) | double precision | hessian (2D) | +! | h_f(m,m,m,m) | double precision | hessian (4D) | +! | e_val(m) | double precision | eigenvalues of the hessian | +! | w(m,m) | double precision | eigenvectors of the hessian | +! | x(m) | double precision | step given by the trust region | +! | m_x(m,m) | double precision | step given by the trust region after | +! | tmp_R(m,m) | double precision | rotation matrix for active MOs | +! | R(mo_num,mo_num) | double precision | full rotation matrix | +! | prev_mos(ao_num,mo_num) | double precision | previous MOs (before the rotation) | +! | new_mos(ao_num,mo_num) | double precision | new MOs (after the roration) | +! | delta | double precision | radius of the trust region | +! | rho | double precision | agreement between the model and the exact function | +! | max_elem | double precision | maximum element in the gradient | +! | i | integer | index | +! | tmp_i,tmp_j | integer | indexes in the subspace containing only | +! | | | the active MOs | +! | converged | logical | convergence of the algorithm | +! | cancel_step | logical | if the step must be cancelled | +! | nb_iter | integer | number of iterations (accepted) | +! | nb_diag | integer | number of diagonalizations of the CI matrix | +! | nb_cancel | integer | number of cancelled steps for the actual iteration | +! | nb_cancel_tot | integer | total number of cancel steps | +! | info | integer | if 0 ok, else problem in the diagonalization of | +! | | | the hessian with the Lapack routine | +! | criterion | double precision | energy at a given step | +! | prev_criterion | double precision | energy before the rotation | +! | criterion_model | double precision | estimated energy after the rotation using | +! | | | a Taylor series | +! | must_exit | logical | To exit the trust region algorithm when | +! | | | criterion - criterion_model is too small | +! | enforce_step_cancellation | logical | To force the cancellation of the step if the | +! | | | error in the rotation matrix is too large | + + +subroutine run_orb_opt_trust_v2 + + include 'constants.h' + + implicit none + + BEGIN_DOC + ! Orbital optimization + END_DOC + + ! Variables + + double precision, allocatable :: R(:,:) + double precision, allocatable :: H(:,:),h_f(:,:,:,:) + double precision, allocatable :: v_grad(:) + double precision, allocatable :: prev_mos(:,:),new_mos(:,:) + integer :: info + integer :: n + integer :: i,j,p,q,k + double precision :: max_elem_grad, delta, rho, norm_grad, normalization_factor + logical :: cancel_step + integer :: nb_iter, nb_diag, nb_cancel, nb_cancel_tot, nb_sub_iter + double precision :: t1, t2, t3 + double precision :: prev_criterion, criterion, criterion_model + logical :: not_converged, must_exit, enforce_step_cancellation + integer :: m, tmp_n, tmp_i, tmp_j, tmp_k, tmp_n2 + integer,allocatable :: tmp_list(:), key(:) + double precision, allocatable :: tmp_m_x(:,:),tmp_R(:,:), tmp_x(:), W(:,:), e_val(:) + + PROVIDE mo_two_e_integrals_in_map ci_energy psi_det psi_coef + +! Allocation + + +allocate(R(mo_num,mo_num)) ! rotation matrix +allocate(prev_mos(ao_num,mo_num), new_mos(ao_num,mo_num)) ! old and new MOs + +! Definition of m and tmp_n +m = dim_list_act_orb +tmp_n = m*(m-1)/2 + +allocate(tmp_list(m)) +allocate(tmp_R(m,m), tmp_m_x(m,m), tmp_x(tmp_n)) +allocate(e_val(tmp_n),key(tmp_n),v_grad(tmp_n)) + +! Method +! There are three different methods : +! - the "full" hessian, which uses all the elements of the hessian +! matrix" +! - the "diagonal" hessian, which uses only the diagonal elements of the +! hessian +! - without the hessian (hessian = identity matrix) + + +!Display the method + print*, 'Method :', optimization_method +if (optimization_method == 'full') then + print*, 'Full hessian' + allocate(H(tmp_n,tmp_n), h_f(m,m,m,m),W(tmp_n,tmp_n)) + tmp_n2 = tmp_n +elseif (optimization_method == 'diag') then + print*,'Diagonal hessian' + allocate(H(tmp_n,1),W(tmp_n,1)) + tmp_n2 = 1 +elseif (optimization_method == 'none') then + print*,'No hessian' + allocate(H(tmp_n,1),W(tmp_n,1)) + tmp_n2 = 1 +else + print*,'Unknown optimization_method, please select full, diag or none' + call abort +endif +print*, 'Absolute value of the hessian:', absolute_eig + +! Algorithm + +! Here is the main algorithm of the optimization: +! - First of all we initialize some parameters and we compute the +! criterion (the ci energy) before doing any MO rotations +! - We compute the gradient and the hessian for the active MOs +! - We diagonalize the hessian +! - We compute a step and loop to reduce the radius of the +! trust region (and the size of the step by the way) until the step is +! accepted +! - We repeat the process until the convergence +! NB: the convergence criterion can be changed + + +! Loop until the convergence of the optimization +! call diagonalize_ci + +!### Initialization ### +nb_iter = 0 +rho = 0.5d0 +not_converged = .True. +tmp_list = list_act ! Optimization of the active MOs +nb_cancel_tot = 0 + +! Renormalization of the weights of the states +call state_weight_normalization + +! Compute the criterion before the loop +call state_average_energy(prev_criterion) + +do while (not_converged) + print*,'' + print*,'******************' + print*,'Iteration', nb_iter + print*,'******************' + print*,'' + + ! Gradient + call gradient_list_opt(tmp_n, m, tmp_list, v_grad, max_elem_grad, norm_grad) + + ! Hessian + if (optimization_method == 'full') then + ! Full hessian + call hessian_list_opt(tmp_n, m, tmp_list, H, h_f) + + ! Diagonalization of the hessian + call diagonalization_hessian(tmp_n, H, e_val, w) + + elseif (optimization_method == 'diag') then + ! Diagonal hessian + call diag_hessian_list_opt(tmp_n, m, tmp_list, H) + else + ! Identity matrix + do tmp_i = 1, tmp_n + H(tmp_i,1) = 1d0 + enddo + endif + + if (optimization_method /= 'full') then + ! Sort + do tmp_i = 1, tmp_n + key(tmp_i) = tmp_i + e_val(tmp_i) = H(tmp_i,1) + enddo + call dsort(e_val,key,tmp_n) + + ! Eigenvalues and eigenvectors + do tmp_i = 1, tmp_n + w(tmp_i,1) = dble(key(tmp_i)) + enddo + + endif + + ! Init before the internal loop + cancel_step = .True. ! To enter in the loop just after + nb_cancel = 0 + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'' + print*,'-----------------------------' + print*,'Iteration: ', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'Max elem grad:', max_elem_grad + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,tmp_n2,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) + + if (must_exit) then + print*,'step_in_trust_region sends: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, m, tmp_x, tmp_m_x) + + ! Rotation matrix for the active MOs + call rotation_matrix(tmp_m_x, m, tmp_R, m, m, info, enforce_step_cancellation) + + ! Security to ensure an unitary transformation + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(m, tmp_list, tmp_R, R) + + ! MO rotations + call apply_mo_rotation(R, prev_mos) + + ! Update of the energy before the diagonalization of the hamiltonian + call clear_mo_map + TOUCH mo_coef psi_det psi_coef ci_energy two_e_dm_mo + call state_average_energy(criterion) + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) + + ! Cancellation of the step if necessary + if (cancel_step) then + mo_coef = prev_mos + call save_mos() + nb_cancel = nb_cancel + 1 + nb_cancel_tot = nb_cancel_tot + 1 + else + ! Diagonalization of the hamiltonian + FREE ci_energy! To enforce the recomputation + call diagonalize_ci + call save_wavefunction_unsorted + + ! Energy obtained after the diagonalization of the CI matrix + call state_average_energy(prev_criterion) + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exit = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem_grad) < thresh_opt_max_elem_grad) then + print*,'Converged: DABS(max_elem_grad) < thresh_opt_max_elem_grad' + not_converged = .False. + endif + if (nb_iter >= optimization_max_nb_iter) then + print*,'Not converged: nb_iter >= optimization_max_nb_iter' + not_converged = .False. + endif + + if (.not. not_converged) then + print*,'#############################' + print*,' End of the optimization' + print*,'#############################' + endif +enddo + +! Deallocation, end + + +deallocate(v_grad,H,R,W,e_val) + deallocate(prev_mos,new_mos) + if (optimization_method == 'full') then + deallocate(h_f) + endif + +end diff --git a/src/mo_optimization/save_energy.irp.f b/src/mo_optimization/save_energy.irp.f new file mode 100644 index 00000000..5dac8da9 --- /dev/null +++ b/src/mo_optimization/save_energy.irp.f @@ -0,0 +1,9 @@ +subroutine save_energy(E,pt2) + implicit none + BEGIN_DOC +! Saves the energy in |EZFIO|. + END_DOC + double precision, intent(in) :: E(N_states), pt2(N_states) + call ezfio_set_fci_energy(E(1:N_states)) + call ezfio_set_fci_energy_pt2(E(1:N_states)+pt2(1:N_states)) +end diff --git a/src/mo_optimization/state_average_energy.irp.f b/src/mo_optimization/state_average_energy.irp.f new file mode 100644 index 00000000..2cd063da --- /dev/null +++ b/src/mo_optimization/state_average_energy.irp.f @@ -0,0 +1,72 @@ +! State average energy + +! Calculation of the state average energy from the integrals and the +! density matrices. + +! \begin{align*} +! E = \sum_{ij} h_{ij} \gamma_{ij} + \frac{1}{2} v_{ij}^{kl} \Gamma_{ij}^{kl} +! \end{align*} +! $h_{ij}$: mono-electronic integral +! $\gamma_{ij}$: one electron density matrix +! $v_{ij}^{kl}$: bi-electronic integral +! $\Gamma_{ij}^{kl}$: two electrons density matrix + +! TODO: OMP version + +! PROVIDED: +! | mo_one_e_integrals | double precision | mono-electronic integrals | +! | get_two_e_integral | double precision | bi-electronic integrals | +! | one_e_dm_mo | double precision | one electron density matrix | +! | two_e_dm_mo | double precision | two electrons density matrix | +! | nuclear_repulsion | double precision | nuclear repulsion | +! | mo_num | integer | number of MOs | + +! Output: +! | energy | double precision | state average energy | + +! Internal: +! | mono_e | double precision | mono-electronic energy | +! | bi_e | double precision | bi-electronic energy | +! | i,j,k,l | integer | indexes to loop over the MOs | + + +subroutine state_average_energy(energy) + + implicit none + + double precision, intent(out) :: energy + + double precision :: get_two_e_integral + double precision :: mono_e, bi_e + integer :: i,j,k,l + + ! mono electronic part + mono_e = 0d0 + do j = 1, mo_num + do i = 1, mo_num + mono_e = mono_e + mo_one_e_integrals(i,j) * one_e_dm_mo(i,j) + enddo + enddo + + ! bi electronic part + bi_e = 0d0 + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + bi_e = bi_e + get_two_e_integral(i,j,k,l,mo_integrals_map) * two_e_dm_mo(i,j,k,l) + enddo + enddo + enddo + enddo + + ! State average energy + energy = mono_e + 0.5d0 * bi_e + nuclear_repulsion + + ! Check + !call print_energy_components + + print*,'State average energy:', energy + !print*,ci_energy + +end diff --git a/src/mo_optimization/state_weight_normalization.irp.f b/src/mo_optimization/state_weight_normalization.irp.f new file mode 100644 index 00000000..27d30af7 --- /dev/null +++ b/src/mo_optimization/state_weight_normalization.irp.f @@ -0,0 +1,29 @@ +subroutine state_weight_normalization + + implicit none + + BEGIN_DOC + ! Renormalization of the state weights or enforcing state average + ! weights for orbital optimization + END_DOC + + integer :: i + double precision :: normalization_factor + + ! To normalize the sum of the state weights + normalization_factor = 0d0 + do i = 1, N_states + normalization_factor = normalization_factor + state_average_weight(i) + enddo + normalization_factor = 1d0 / normalization_factor + + do i = 1, N_states + state_average_weight(i) = state_average_weight(i) * normalization_factor + enddo + TOUCH state_average_weight + + print*, 'Number of states:', N_states + print*, 'State average weights:' + print*, state_average_weight(:) + +end diff --git a/src/mo_optimization/update_parameters.irp.f b/src/mo_optimization/update_parameters.irp.f new file mode 100644 index 00000000..88e8fc34 --- /dev/null +++ b/src/mo_optimization/update_parameters.irp.f @@ -0,0 +1,15 @@ +! Subroutine toupdate the parameters. +! Ex: TOUCH mo_coef ... + + +subroutine update_parameters() + + implicit none + + !### TODO + ! Touch yours parameters + call clear_mo_map + TOUCH mo_coef psi_det psi_coef + call diagonalize_ci + call save_wavefunction_unsorted +end diff --git a/src/mo_optimization/update_st_av_ci_energy.irp.f b/src/mo_optimization/update_st_av_ci_energy.irp.f new file mode 100644 index 00000000..18b72502 --- /dev/null +++ b/src/mo_optimization/update_st_av_ci_energy.irp.f @@ -0,0 +1,25 @@ +! Update the CI state average energy + +! Computes the state average energy +! \begin{align*} +! E =\sum_{i=1}^{N_{states}} E_i . w_i +! \end{align*} + +! $E_i$: energy of state i +! $w_i$: weight of state i + + +subroutine update_st_av_ci_energy(energy) + + implicit none + + double precision, intent(out) :: energy + integer :: i + + energy = 0d0 + do i = 1, N_states + energy = energy + ci_energy(i) * state_average_weight(i) + enddo + + print*, 'ci_energy :', energy +end From f228b0a3a477fa10d7b8194f43ae1615d1200bbc Mon Sep 17 00:00:00 2001 From: ydamour Date: Fri, 21 Apr 2023 13:43:49 +0200 Subject: [PATCH 08/29] missing program --- src/mo_optimization/optimization.irp.f | 86 ++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 src/mo_optimization/optimization.irp.f diff --git a/src/mo_optimization/optimization.irp.f b/src/mo_optimization/optimization.irp.f new file mode 100644 index 00000000..9892b3e3 --- /dev/null +++ b/src/mo_optimization/optimization.irp.f @@ -0,0 +1,86 @@ +program optimization + + read_wf = .true. ! must be True for the orbital optimization !!! + TOUCH read_wf + call run_optimization + +end + +subroutine run_optimization + + implicit none + + double precision :: e_cipsi, e_opt, delta_e + integer :: nb_iter,i + logical :: not_converged + character (len=100) :: filename + + PROVIDE psi_det psi_coef mo_two_e_integrals_in_map + + not_converged = .True. + nb_iter = 0 + + ! To start from the wf + N_det_max = max(n_det,5) + TOUCH N_det_max + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt') + write(10,*) " Ndet E_cipsi E_opt Delta_e" + call state_average_energy(e_cipsi) + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_cipsi, 0d0 + close(10) + + do while (not_converged) + print*,'' + print*,'======================' + print*,' Cipsi step:', nb_iter + print*,'======================' + print*,'' + print*,'********** cipsi step **********' + ! cispi calculation + call run_stochastic_cipsi + + ! State average energy after the cipsi step + call state_average_energy(e_cipsi) + + print*,'' + print*,'********** optimization step **********' + ! orbital optimization + call run_orb_opt_trust_v2 + + ! State average energy after the orbital optimization + call state_average_energy(e_opt) + + print*,'' + print*,'********** diff step **********' + ! Gain in energy + delta_e = e_opt - e_cipsi + print*, 'Gain in energy during the orbital optimization:', delta_e + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt', position='append') + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_opt, delta_e + close(10) + + ! Exit + if (delta_e > 1d-12) then + print*, 'WARNING, something wrong happened' + print*, 'The gain (delta_e) in energy during the optimization process' + print*, 'is > 0, but it must be < 0' + print*, 'The program will exit' + exit + endif + + if (n_det > n_det_max_opt) then + print*, 'The number of determinants in the wf > n_det_max_opt' + print*, 'The program will exit' + exit + endif + + ! To double the number of determinants in the wf + N_det_max = int(dble(n_det * 2)*0.9) + TOUCH N_det_max + + nb_iter = nb_iter + 1 + enddo + +end From 20d057b7d1c046ef6872d6713e57511e4adc311e Mon Sep 17 00:00:00 2001 From: ydamour Date: Fri, 21 Apr 2023 13:44:51 +0200 Subject: [PATCH 09/29] unecessary parameter --- src/mo_optimization/EZFIO.cfg | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/mo_optimization/EZFIO.cfg b/src/mo_optimization/EZFIO.cfg index 8944e507..e6aa2d67 100644 --- a/src/mo_optimization/EZFIO.cfg +++ b/src/mo_optimization/EZFIO.cfg @@ -4,12 +4,6 @@ doc: Define the kind of hessian for the orbital optimization full : full hessian interface: ezfio,provider,ocaml default: full -[n_det_start] -type: integer -doc: Number of determinants after which the orbital optimization will start, n_det_start must be greater than 1. The algorithm does a cipsi until n_det > n_det_start and the optimization starts after -interface: ezfio,provider,ocaml -default: 5 - [n_det_max_opt] type: integer doc: Maximal number of the determinants in the wf for the orbital optimization (to stop the optimization if n_det > n_det_max_opt) From c96e7c754e609fbc7391451a2d6e9588997b5958 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 5 May 2023 12:25:37 +0200 Subject: [PATCH 10/29] mo_num -> n_core_inact_act_orb in RDMs --- src/two_body_rdm/two_e_dm_mo.irp.f | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/two_body_rdm/two_e_dm_mo.irp.f b/src/two_body_rdm/two_e_dm_mo.irp.f index 7e35fc7b..6bd115a2 100644 --- a/src/two_body_rdm/two_e_dm_mo.irp.f +++ b/src/two_body_rdm/two_e_dm_mo.irp.f @@ -16,13 +16,13 @@ BEGIN_PROVIDER [double precision, two_e_dm_mo, (mo_num,mo_num,mo_num,mo_num)] two_e_dm_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb,istate - do l=1,mo_num + do l=1,n_core_inact_act_orb lorb = list_core_inact_act(l) - do k=1,mo_num + do k=1,n_core_inact_act_orb korb = list_core_inact_act(k) - do j=1,mo_num + do j=1,n_core_inact_act_orb jorb = list_core_inact_act(j) - do i=1,mo_num + do i=1,n_core_inact_act_orb iorb = list_core_inact_act(i) two_e_dm_mo(iorb,jorb,korb,lorb) = state_av_full_occ_2_rdm_spin_trace_mo(i,j,k,l) enddo From f314c5abc291144eab0d76a591e73166ce90fa05 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 10 May 2023 11:14:57 +0200 Subject: [PATCH 11/29] Added qp_json --- scripts/qp_exc_energy.py | 18 ++++++----- scripts/utility/qp_json.py | 66 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 8 deletions(-) create mode 100644 scripts/utility/qp_json.py diff --git a/scripts/qp_exc_energy.py b/scripts/qp_exc_energy.py index ba9d7917..7e7f1d67 100755 --- a/scripts/qp_exc_energy.py +++ b/scripts/qp_exc_energy.py @@ -42,13 +42,15 @@ import sys, os import scipy import scipy.stats from math import sqrt, gamma, exp -import json +import qp_json -def read_data(filename,state): +def read_data(ezfio_filename,state): """ Read energies and PT2 from input file """ - with open(filename,'r') as f: - lines = json.load(f)['fci'] + data = qp_json.load_last(ezfio_filename) + for method in data.keys(): + x = data[method] + lines = x print(f"State: {state}") @@ -138,15 +140,15 @@ def compute(data): return mu, err, bias, p -filename = sys.argv[1] -print(filename) +ezfio_filename = sys.argv[1] +print(ezfio_filename) if len(sys.argv) > 2: state = int(sys.argv[2]) else: state = 1 -data = read_data(filename,state) +data = read_data(ezfio_filename,state) mu, err, bias, _ = compute(data) -print(" %s: %8.3f +/- %5.3f eV\n"%(filename, mu, err)) +print(" %s: %8.3f +/- %5.3f eV\n"%(ezfio_filename, mu, err)) import numpy as np A = np.array( [ [ data[-1][1], 1. ], diff --git a/scripts/utility/qp_json.py b/scripts/utility/qp_json.py new file mode 100644 index 00000000..09ffe1be --- /dev/null +++ b/scripts/utility/qp_json.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python +import os +import json + +def fix_json(s): + """Properly termitates an incomplete JSON file""" + + s = s.replace(' ','') + s = s.replace('\n','') + s = s.replace('\t','') + s = s.replace(",{}",'') + tmp = [ c for c in s if c in "[]{}" ] + tmp = "".join(tmp) + tmp_old = "" + while tmp != tmp_old: + tmp_old = tmp + tmp = tmp.replace("{}","") + tmp = tmp.replace("[]","") + while s[-1] in [ ',', '\n', ' ', '\t' ]: + s = s[:-1] + tmp = [ c for c in tmp ] + tmp.reverse() + for c in tmp: + if c == '[': s += "]" + elif c == '{': s += "}" + return s + + +def load(filename): + """Loads a JSON file after calling the fix_json function.""" + with open(filename,'r') as f: + data = f.read() + new_data = fix_json(data) + return json.loads(new_data) + + +def load_all(ezfio_filename): + """Loads all JSON files of an EZFIO.""" + d = {} + prefix = ezfio_filename+'/json/' + for filename in [ x for x in os.listdir(prefix) if x.endswith(".json")]: + d[filename] = load(prefix+filename) + return d + + +def load_last(ezfio_filename): + """Loads last JSON file of an EZFIO.""" + d = {} + prefix = ezfio_filename+'/json/' + l = [ x for x in os.listdir(prefix) if x.endswith(".json")] + l.sort() + filename = l[-1] + print(filename) + return load(prefix+filename) + + +def fix(ezfio_filename): + """Fixes all JSON files in an EZFIO.""" + d = load_all(ezfio_filename) + prefix = ezfio_filename+'/json/' + for filename in d.keys(): + with open(prefix+filename, 'w') as json_file: + json.dump(d[filename], json_file) + + + From 20bed4f44a85e5717ac535cd98360d55119518ac Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 10 May 2023 12:22:56 +0200 Subject: [PATCH 12/29] Fix reversed print of minimum PT2 in extrapolations --- src/iterations/print_extrapolation.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/iterations/print_extrapolation.irp.f b/src/iterations/print_extrapolation.irp.f index 111429bf..a7f85693 100644 --- a/src/iterations/print_extrapolation.irp.f +++ b/src/iterations/print_extrapolation.irp.f @@ -25,7 +25,7 @@ subroutine print_extrapolated_energy write(*,*) 'minimum PT2 ', 'Extrapolated energy' write(*,*) '=========== ', '===================' do k=2,N_iter_p - write(*,'(F11.4,2X,F18.8)') pt2_iterations(1,k), extrapolated_energy(k,1) + write(*,'(F11.4,2X,F18.8)') pt2_iterations(1,N_iter_p+1-k), extrapolated_energy(k,1) enddo write(*,*) '=========== ', '===================' From 52da1de877934f398d76d557457a0b1ae5c8e345 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 10 May 2023 12:54:02 +0200 Subject: [PATCH 13/29] qp_extract_cipsi_data.py uses qp_json --- scripts/qp_extract_cipsi_data.py | 80 +++++++++++++------------------- 1 file changed, 31 insertions(+), 49 deletions(-) diff --git a/scripts/qp_extract_cipsi_data.py b/scripts/qp_extract_cipsi_data.py index 70935d73..200ab7aa 100755 --- a/scripts/qp_extract_cipsi_data.py +++ b/scripts/qp_extract_cipsi_data.py @@ -1,55 +1,37 @@ #!/usr/bin/env python3 -import re +import qp_json import sys -# Read output file -with open(sys.argv[1], 'r') as file: - output = file.read() +if len(sys.argv) == 1: + print(f"syntax: {sys.argv[0]} EZFIO_FILE") + +d = qp_json.load_all(sys.argv[1]) + +k = [ x for x in d.keys() ] +k.sort() + +print("# Energy PT2 PT2_err rPT2 rPT2_err exFCI\n") +for f in k: + try: + j = d[f]["fci"] + except: + continue + + print(f"# {f}") + for e in j: + + out = f" {e['n_det']:8d}" + + nstates = len(e["states"]) + for ee in e["states"]: + try: + exc_energy = ee['ex_energy'][0] + except: + exc_energy = 0. + out += f" {ee['energy']:16.8f} {ee['pt2']:e} {ee['pt2_err']:e} {ee['rpt2']:e} {ee['rpt2_err']:e} {exc_energy:16.8f}" + print(out) + + print("\n") -def extract_data(output): - lines = output.split("\n") - data = [] - - n_det = None - e = None - pt2 = None - err_pt2 = None - rpt2 = None - err_rpt2 = None - e_ex = None - - - reading = False - for iline, line in enumerate(lines): - if line.startswith("Summary at N_det"): - reading = False - - if not reading and line.startswith(" N_det "): - n_det = int(re.search(r"N_det\s+=\s+(\d+)", line).group(1)) - reading = True - - if reading: - if line.startswith(" E "): - e = float(re.search(r"E\s+=\s+(-?\d+\.\d+)", line).group(1)) - elif line.startswith(" PT2 "): - pt2 = float(re.search(r"PT2\s+=\s+(-?\d+\.\d+E?.\d*)", line).group(1)) - err_pt2 = float(re.search(r"\+/-\s+(-?\d+\.\d+E?.\d*)", line).group(1)) - elif line.startswith(" rPT2 "): - rpt2 = float(re.search(r"rPT2\s+=\s+(-?\d+\.\d+E?.\d*)", line).group(1)) - err_rpt2 = float(re.search(r"\+/-\s+(-?\d+\.\d+E?.\d*)", line).group(1)) - elif "minimum PT2 Extrapolated energy" in line: - e_ex_line = lines[iline+2] - e_ex = float(e_ex_line.split()[1]) - reading = False - new_data = " {:8d} {:16.8f} {:e} {:e} {:e} {:e} {:16.8f}".format(n_det, e, pt2, err_pt2, rpt2, err_rpt2, e_ex) - data.append(new_data) - n_det = e = pt2 = err_pt2 = rpt2 = err_rpt2 = e_ex = None - - return data - -data = extract_data(output) - -for item in data: - print(item) From 46e3faed3cdc90fa3c6a82bbd40655378b438ea7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 10 May 2023 14:44:45 +0200 Subject: [PATCH 14/29] allow no basis set --- data/basis/none | 5 + ocaml/Input_ao_basis.ml | 8 +- ocaml/qp_create_ezfio.ml | 262 ++++++++++++----------- scripts/ezfio_interface/qp_edit_template | 36 ++-- src/ao_two_e_ints/EZFIO.cfg | 2 +- src/mo_basis/utils.irp.f | 5 +- 6 files changed, 161 insertions(+), 157 deletions(-) create mode 100644 data/basis/none diff --git a/data/basis/none b/data/basis/none new file mode 100644 index 00000000..df5d59f1 --- /dev/null +++ b/data/basis/none @@ -0,0 +1,5 @@ +$DATA + +HYDROGEN + +$END diff --git a/ocaml/Input_ao_basis.ml b/ocaml/Input_ao_basis.ml index 95d37a7a..841089ea 100644 --- a/ocaml/Input_ao_basis.ml +++ b/ocaml/Input_ao_basis.ml @@ -247,8 +247,7 @@ end = struct let read () = - if (Ezfio.has_ao_basis_ao_basis ()) then - begin + try let result = { ao_basis = read_ao_basis (); ao_num = read_ao_num () ; @@ -267,9 +266,8 @@ end = struct |> MD5.to_string |> Ezfio.set_ao_basis_ao_md5 ; Some result - end - else - None + with + | _ -> (Ezfio.set_ao_basis_ao_md5 "None" ; None) ;; diff --git a/ocaml/qp_create_ezfio.ml b/ocaml/qp_create_ezfio.ml index 4583b118..8e452762 100644 --- a/ocaml/qp_create_ezfio.ml +++ b/ocaml/qp_create_ezfio.ml @@ -478,6 +478,7 @@ let run ?o b au c d m p cart xyz_file = let nmax = Nucl_number.get_max () in + let rec do_work (accu:(Atom.t*Nucl_number.t) list) (n:int) = function | [] -> accu | e::tail -> @@ -520,141 +521,144 @@ let run ?o b au c d m p cart xyz_file = in let long_basis = Long_basis.of_basis basis in let ao_num = List.length long_basis in - Ezfio.set_ao_basis_ao_num ao_num; - Ezfio.set_ao_basis_ao_basis b; - Ezfio.set_basis_basis b; - let ao_prim_num = list_map (fun (_,g,_) -> List.length g.Gto.lc) long_basis - and ao_nucl = list_map (fun (_,_,n) -> Nucl_number.to_int n) long_basis - and ao_power= - let l = list_map (fun (x,_,_) -> x) long_basis in - (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.x)) l)@ - (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.y)) l)@ - (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.z)) l) - in - let ao_prim_num_max = List.fold_left (fun s x -> - if x > s then x - else s) 0 ao_prim_num - in - let gtos = - list_map (fun (_,x,_) -> x) long_basis - in - - let create_expo_coef ec = - let coefs = - begin match ec with - | `Coefs -> list_map (fun x-> - list_map (fun (_,coef) -> - AO_coef.to_float coef) x.Gto.lc) gtos - | `Expos -> list_map (fun x-> - list_map (fun (prim,_) -> AO_expo.to_float - prim.GaussianPrimitive.expo) x.Gto.lc) gtos - end + if ao_num > 0 then + begin + Ezfio.set_ao_basis_ao_num ao_num; + Ezfio.set_ao_basis_ao_basis b; + Ezfio.set_basis_basis b; + let ao_prim_num = list_map (fun (_,g,_) -> List.length g.Gto.lc) long_basis + and ao_nucl = list_map (fun (_,_,n) -> Nucl_number.to_int n) long_basis + and ao_power= + let l = list_map (fun (x,_,_) -> x) long_basis in + (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.x)) l)@ + (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.y)) l)@ + (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.z)) l) in - let rec get_n n accu = function - | [] -> List.rev accu - | h::tail -> - let y = - begin match List.nth_opt h n with - | Some x -> x - | None -> 0. + let ao_prim_num_max = List.fold_left (fun s x -> + if x > s then x + else s) 0 ao_prim_num + in + let gtos = + list_map (fun (_,x,_) -> x) long_basis + in + + let create_expo_coef ec = + let coefs = + begin match ec with + | `Coefs -> list_map (fun x-> + list_map (fun (_,coef) -> + AO_coef.to_float coef) x.Gto.lc) gtos + | `Expos -> list_map (fun x-> + list_map (fun (prim,_) -> AO_expo.to_float + prim.GaussianPrimitive.expo) x.Gto.lc) gtos end - in - get_n n (y::accu) tail + in + let rec get_n n accu = function + | [] -> List.rev accu + | h::tail -> + let y = + begin match List.nth_opt h n with + | Some x -> x + | None -> 0. + end + in + get_n n (y::accu) tail + in + let rec build accu = function + | n when n=ao_prim_num_max -> accu + | n -> build ( accu @ (get_n n [] coefs) ) (n+1) + in + build [] 0 in - let rec build accu = function - | n when n=ao_prim_num_max -> accu - | n -> build ( accu @ (get_n n [] coefs) ) (n+1) - in - build [] 0 - in - let ao_coef = create_expo_coef `Coefs - and ao_expo = create_expo_coef `Expos - in - let () = - let shell_num = List.length basis in - let lc : (GaussianPrimitive.t * Qptypes.AO_coef.t) list list = - list_map ( fun (g,_) -> g.Gto.lc ) basis - in - let ang_mom = - list_map (fun (l : (GaussianPrimitive.t * Qptypes.AO_coef.t) list) -> - let x, _ = List.hd l in - Angmom.to_l x.GaussianPrimitive.sym |> Qptypes.Positive_int.to_int - ) lc - in - let expo = - list_map (fun l -> list_map (fun (x,_) -> Qptypes.AO_expo.to_float x.GaussianPrimitive.expo) l ) lc - |> List.concat - in - let coef = - list_map (fun l -> - list_map (fun (_,x) -> Qptypes.AO_coef.to_float x) l - ) lc - |> List.concat - in - let shell_prim_num = - list_map List.length lc - in - let shell_idx = - let rec make_list n accu = function - | 0 -> accu - | i -> make_list n (n :: accu) (i-1) + let ao_coef = create_expo_coef `Coefs + and ao_expo = create_expo_coef `Expos in - let rec aux count accu = function - | [] -> List.rev accu - | l::rest -> - let new_l = make_list count accu (List.length l) in - aux (count+1) new_l rest - in - aux 1 [] lc - in - let prim_num = List.length coef in - Ezfio.set_basis_typ "Gaussian"; - Ezfio.set_basis_shell_num shell_num; - Ezfio.set_basis_prim_num prim_num ; - Ezfio.set_basis_shell_prim_num (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| shell_num |] ~data:shell_prim_num); - Ezfio.set_basis_shell_ang_mom (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| shell_num |] ~data:ang_mom ) ; - Ezfio.set_basis_shell_index (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| prim_num |] ~data:shell_idx) ; - Ezfio.set_basis_basis_nucleus_index (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| shell_num |] - ~data:( list_map (fun (_,n) -> Nucl_number.to_int n) basis) - ) ; - Ezfio.set_basis_nucleus_shell_num(Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| nucl_num |] - ~data:( - list_map (fun (_,n) -> Nucl_number.to_int n) basis - |> List.fold_left (fun accu i -> - match accu with - | [] -> [(1,i)] - | (h,j) :: rest -> if j == i then ((h+1,j)::rest) else ((1,i)::(h,j)::rest) - ) [] - |> List.rev - |> List.map fst - )) ; - Ezfio.set_basis_prim_coef (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| prim_num |] ~data:coef) ; - Ezfio.set_basis_prim_expo (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| prim_num |] ~data:expo) ; + let () = + let shell_num = List.length basis in + let lc : (GaussianPrimitive.t * Qptypes.AO_coef.t) list list = + list_map ( fun (g,_) -> g.Gto.lc ) basis + in + let ang_mom = + list_map (fun (l : (GaussianPrimitive.t * Qptypes.AO_coef.t) list) -> + let x, _ = List.hd l in + Angmom.to_l x.GaussianPrimitive.sym |> Qptypes.Positive_int.to_int + ) lc + in + let expo = + list_map (fun l -> list_map (fun (x,_) -> Qptypes.AO_expo.to_float x.GaussianPrimitive.expo) l ) lc + |> List.concat + in + let coef = + list_map (fun l -> + list_map (fun (_,x) -> Qptypes.AO_coef.to_float x) l + ) lc + |> List.concat + in + let shell_prim_num = + list_map List.length lc + in + let shell_idx = + let rec make_list n accu = function + | 0 -> accu + | i -> make_list n (n :: accu) (i-1) + in + let rec aux count accu = function + | [] -> List.rev accu + | l::rest -> + let new_l = make_list count accu (List.length l) in + aux (count+1) new_l rest + in + aux 1 [] lc + in + let prim_num = List.length coef in + Ezfio.set_basis_typ "Gaussian"; + Ezfio.set_basis_shell_num shell_num; + Ezfio.set_basis_prim_num prim_num ; + Ezfio.set_basis_shell_prim_num (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| shell_num |] ~data:shell_prim_num); + Ezfio.set_basis_shell_ang_mom (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| shell_num |] ~data:ang_mom ) ; + Ezfio.set_basis_shell_index (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| prim_num |] ~data:shell_idx) ; + Ezfio.set_basis_basis_nucleus_index (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| shell_num |] + ~data:( list_map (fun (_,n) -> Nucl_number.to_int n) basis) + ) ; + Ezfio.set_basis_nucleus_shell_num(Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| nucl_num |] + ~data:( + list_map (fun (_,n) -> Nucl_number.to_int n) basis + |> List.fold_left (fun accu i -> + match accu with + | [] -> [(1,i)] + | (h,j) :: rest -> if j == i then ((h+1,j)::rest) else ((1,i)::(h,j)::rest) + ) [] + |> List.rev + |> List.map fst + )) ; + Ezfio.set_basis_prim_coef (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| prim_num |] ~data:coef) ; + Ezfio.set_basis_prim_expo (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| prim_num |] ~data:expo) ; - Ezfio.set_ao_basis_ao_prim_num (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| ao_num |] ~data:ao_prim_num) ; - Ezfio.set_ao_basis_ao_nucl(Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| ao_num |] ~data:ao_nucl) ; - Ezfio.set_ao_basis_ao_power(Ezfio.ezfio_array_of_list - ~rank:2 ~dim:[| ao_num ; 3 |] ~data:ao_power) ; - Ezfio.set_ao_basis_ao_coef(Ezfio.ezfio_array_of_list - ~rank:2 ~dim:[| ao_num ; ao_prim_num_max |] ~data:ao_coef) ; - Ezfio.set_ao_basis_ao_expo(Ezfio.ezfio_array_of_list - ~rank:2 ~dim:[| ao_num ; ao_prim_num_max |] ~data:ao_expo) ; - Ezfio.set_ao_basis_ao_cartesian(cart); - in - match Input.Ao_basis.read () with - | None -> failwith "Error in basis" - | Some x -> Input.Ao_basis.write x + Ezfio.set_ao_basis_ao_prim_num (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| ao_num |] ~data:ao_prim_num) ; + Ezfio.set_ao_basis_ao_nucl(Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| ao_num |] ~data:ao_nucl) ; + Ezfio.set_ao_basis_ao_power(Ezfio.ezfio_array_of_list + ~rank:2 ~dim:[| ao_num ; 3 |] ~data:ao_power) ; + Ezfio.set_ao_basis_ao_coef(Ezfio.ezfio_array_of_list + ~rank:2 ~dim:[| ao_num ; ao_prim_num_max |] ~data:ao_coef) ; + Ezfio.set_ao_basis_ao_expo(Ezfio.ezfio_array_of_list + ~rank:2 ~dim:[| ao_num ; ao_prim_num_max |] ~data:ao_expo) ; + Ezfio.set_ao_basis_ao_cartesian(cart); + in + match Input.Ao_basis.read () with + | None -> failwith "Error in basis" + | Some x -> Input.Ao_basis.write x + end in let () = try write_file () with @@ -781,7 +785,7 @@ If a file with the same name as the basis set exists, this file will be read. O run ?o:output basis au charge dummy multiplicity pseudo cart xyz_filename ) with - | Failure txt -> Printf.eprintf "Fatal error: %s\n%!" txt +(* | Failure txt -> Printf.eprintf "Fatal error: %s\n%!" txt *) | Command_line.Error txt -> Printf.eprintf "Command line error: %s\n%!" txt diff --git a/scripts/ezfio_interface/qp_edit_template b/scripts/ezfio_interface/qp_edit_template index 4218456d..fe718a50 100644 --- a/scripts/ezfio_interface/qp_edit_template +++ b/scripts/ezfio_interface/qp_edit_template @@ -172,25 +172,23 @@ let run check_only ?ndet ?state ezfio_filename = (* Reorder basis set *) begin - let aos = - match Input.Ao_basis.read() with - | Some x -> x - | _ -> assert false - in - let ordering = Input.Ao_basis.ordering aos in - let test = Array.copy ordering in - Array.sort compare test ; - if test <> ordering then - begin - Printf.eprintf "Warning: Basis set is not properly ordered. Redordering.\n"; - let new_aos = Input.Ao_basis.reorder aos in - Input.Ao_basis.write new_aos; - match Input.Mo_basis.read() with - | None -> () - | Some mos -> - let new_mos = Input.Mo_basis.reorder mos ordering in - Input.Mo_basis.write new_mos - end + match Input.Ao_basis.read() with + | Some aos -> + let ordering = Input.Ao_basis.ordering aos in + let test = Array.copy ordering in + Array.sort compare test ; + if test <> ordering then + begin + Printf.eprintf "Warning: Basis set is not properly ordered. Redordering.\n"; + let new_aos = Input.Ao_basis.reorder aos in + Input.Ao_basis.write new_aos; + match Input.Mo_basis.read() with + | None -> () + | Some mos -> + let new_mos = Input.Mo_basis.reorder mos ordering in + Input.Mo_basis.write new_mos + end + | _ -> () end; begin diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index caed4698..4ab080ec 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -22,4 +22,4 @@ ezfio_name: direct type: logical doc: Perform Cholesky decomposition of AO integrals interface: ezfio,provider,ocaml -default: True +default: False diff --git a/src/mo_basis/utils.irp.f b/src/mo_basis/utils.irp.f index 12c6c79d..5f664c41 100644 --- a/src/mo_basis/utils.irp.f +++ b/src/mo_basis/utils.irp.f @@ -3,7 +3,6 @@ subroutine save_mos double precision, allocatable :: buffer(:,:) integer :: i,j - call system('$QP_ROOT/scripts/save_current_mos.sh '//trim(ezfio_filename)) call ezfio_set_mo_basis_mo_num(mo_num) call ezfio_set_mo_basis_mo_label(mo_label) call ezfio_set_mo_basis_ao_md5(ao_md5) @@ -27,7 +26,7 @@ subroutine save_mos_no_occ double precision, allocatable :: buffer(:,:) integer :: i,j - call system('$QP_ROOT/scripts/save_current_mos.sh '//trim(ezfio_filename)) +! call system('$QP_ROOT/scripts/save_current_mos.sh '//trim(ezfio_filename)) !call ezfio_set_mo_basis_mo_num(mo_num) !call ezfio_set_mo_basis_mo_label(mo_label) !call ezfio_set_mo_basis_ao_md5(ao_md5) @@ -48,7 +47,7 @@ subroutine save_mos_truncated(n) double precision, allocatable :: buffer(:,:) integer :: i,j,n - call system('$QP_ROOT/scripts/save_current_mos.sh '//trim(ezfio_filename)) +! call system('$QP_ROOT/scripts/save_current_mos.sh '//trim(ezfio_filename)) call ezfio_set_mo_basis_mo_num(n) call ezfio_set_mo_basis_mo_label(mo_label) From f0b71bc2b0ec38dc8d151f7bc9c410b781e28b03 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 10 May 2023 18:06:50 +0200 Subject: [PATCH 15/29] Add libtrexio in configure --- configure | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/configure b/configure index d3377093..4dd753ff 100755 --- a/configure +++ b/configure @@ -9,6 +9,8 @@ echo "QP_ROOT="$QP_ROOT unset CC unset CCXX +TREXIO_VERSION=2.3.1 + # Force GCC instead of ICC for dependencies export CC=gcc @@ -189,7 +191,7 @@ if [[ "${PACKAGES}.x" != ".x" ]] ; then fi if [[ ${PACKAGES} = all ]] ; then - PACKAGES="zlib ninja zeromq f77zmq gmp ocaml docopt resultsFile bats" + PACKAGES="zlib ninja zeromq f77zmq gmp ocaml docopt resultsFile bats trexio" fi @@ -203,6 +205,33 @@ for PACKAGE in ${PACKAGES} ; do mv ninja "\${QP_ROOT}"/bin/ EOF + elif [[ ${PACKAGE} = trexio-nohdf5 ]] ; then + + VERSION=$TREXIO_VERSION + execute << EOF + cd "\${QP_ROOT}"/external + wget https://github.com/TREX-CoE/trexio/releases/download/v${VERSION}/trexio-${VERSION}.tar.gz + tar -zxf trexio-${VERSION}.tar.gz + cd trexio-${VERSION} + ./configure --prefix=\${QP_ROOT} --without-hdf5 + make -j 8 && make -j 8 check && make -j 8 install + cp ${QP_ROOT}/include/trexio_f.f90 ${QP_ROOT}/src/ezfio_files + tar -zxvf "\${QP_ROOT}"/external/qp2-dependencies/${ARCHITECTURE}/ninja.tar.gz + mv ninja "\${QP_ROOT}"/bin/ +EOF + elif [[ ${PACKAGE} = trexio ]] ; then + + VERSION=$TREXIO_VERSION + execute << EOF + cd "\${QP_ROOT}"/external + wget https://github.com/TREX-CoE/trexio/releases/download/v${VERSION}/trexio-${VERSION}.tar.gz + tar -zxf trexio-${VERSION}.tar.gz + cd trexio-${VERSION} + ./configure --prefix=\${QP_ROOT} + make -j 8 && make -j 8 check && make -j 8 install + cp ${QP_ROOT}/include/trexio_f.f90 ${QP_ROOT}/src/ezfio_files +EOF + elif [[ ${PACKAGE} = gmp ]] ; then @@ -338,6 +367,12 @@ if [[ ${ZEROMQ} = $(not_found) ]] ; then fail fi +TREXIO=$(find_lib -ltrexio) +if [[ ${TREXIO} = $(not_found) ]] ; then + error "TREXIO (trexio,trexio-nohdf5) is not installed. If you don't have HDF5, use trexio-nohdf5" + fail +fi + F77ZMQ=$(find_lib -lzmq -lf77zmq -lpthread) if [[ ${F77ZMQ} = $(not_found) ]] ; then error "Fortran binding of ZeroMQ (f77zmq) is not installed." From 01b70ffb17389485d32069d2f53041998c94763d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 11 May 2023 22:45:18 +0200 Subject: [PATCH 16/29] Removed penalty method from TCSCF: problem with normal ordering --- src/tc_bi_ortho/dav_h_tc_s2.irp.f | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/tc_bi_ortho/dav_h_tc_s2.irp.f b/src/tc_bi_ortho/dav_h_tc_s2.irp.f index ea9cacff..3e89bbe2 100644 --- a/src/tc_bi_ortho/dav_h_tc_s2.irp.f +++ b/src/tc_bi_ortho/dav_h_tc_s2.irp.f @@ -304,22 +304,23 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N ! Penalty method ! -------------- - if (s2_eig) then - h_p = s_ - do k=1,shift2 - h_p(k,k) = h_p(k,k) - expected_s2 - enddo - if (only_expected_s2) then - alpha = 0.1d0 - h_p = h + alpha*h_p - else - alpha = 0.0001d0 - h_p = h + alpha*h_p - endif - else +! if (s2_eig) then +! h_p = s_ +! do k=1,shift2 +! h_p(k,k) = h_p(k,k) - expected_s2 +! enddo +! if (only_expected_s2) then +! alpha = 0.1d0 +! h_p = h + alpha*h_p +! else +! alpha = 0.0001d0 +! h_p = h + alpha*h_p +! endif +! else h_p = h alpha = 0.d0 - endif +! endif + ! Diagonalize h y = lambda y ! --------------------------- From 49598822938da0ac9fbe9334f6f1d61d18de7f93 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 11 May 2023 22:48:48 +0200 Subject: [PATCH 17/29] Added TREXIO module --- src/trexio/EZFIO.cfg | 54 ++ src/trexio/README.rst | 6 + src/trexio/export_trexio.irp.f | 7 + src/trexio/export_trexio_routines.irp.f | 604 ++++++++++++++++++++ src/trexio/import_trexio_determinants.irp.f | 79 +++ src/trexio/import_trexio_integrals.irp.f | 146 +++++ src/trexio/qp_import_trexio.py | 415 ++++++++++++++ src/trexio/trexio_file.irp.f | 20 + src/trexio/trexio_module.F90 | 1 + 9 files changed, 1332 insertions(+) create mode 100644 src/trexio/EZFIO.cfg create mode 100644 src/trexio/README.rst create mode 100644 src/trexio/export_trexio.irp.f create mode 100644 src/trexio/export_trexio_routines.irp.f create mode 100644 src/trexio/import_trexio_determinants.irp.f create mode 100644 src/trexio/import_trexio_integrals.irp.f create mode 100755 src/trexio/qp_import_trexio.py create mode 100644 src/trexio/trexio_file.irp.f create mode 100644 src/trexio/trexio_module.F90 diff --git a/src/trexio/EZFIO.cfg b/src/trexio/EZFIO.cfg new file mode 100644 index 00000000..8606e908 --- /dev/null +++ b/src/trexio/EZFIO.cfg @@ -0,0 +1,54 @@ +[backend] +type: integer +doc: Back-end used in TREXIO. 0: HDF5, 1:Text +interface: ezfio, ocaml, provider +default: 0 + +[trexio_file] +type: character*(256) +doc: Name of the exported TREXIO file +interface: ezfio, ocaml, provider +default: None + +[export_rdm] +type: logical +doc: If True, export two-body reduced density matrix +interface: ezfio, ocaml, provider +default: False + +[export_ao_one_e_ints] +type: logical +doc: If True, export one-electron integrals in AO basis +interface: ezfio, ocaml, provider +default: False + +[export_mo_one_e_ints] +type: logical +doc: If True, export one-electron integrals in MO basis +interface: ezfio, ocaml, provider +default: False + +[export_ao_two_e_ints] +type: logical +doc: If True, export two-electron integrals in AO basis +interface: ezfio, ocaml, provider +default: False + +[export_ao_two_e_ints_cholesky] +type: logical +doc: If True, export Cholesky-decomposed two-electron integrals in AO basis +interface: ezfio, ocaml, provider +default: False + +[export_mo_two_e_ints] +type: logical +doc: If True, export two-electron integrals in MO basis +interface: ezfio, ocaml, provider +default: False + +[export_mo_two_e_ints_cholesky] +type: logical +doc: If True, export Cholesky-decomposed two-electron integrals in MO basis +interface: ezfio, ocaml, provider +default: False + diff --git a/src/trexio/README.rst b/src/trexio/README.rst new file mode 100644 index 00000000..7d7304c6 --- /dev/null +++ b/src/trexio/README.rst @@ -0,0 +1,6 @@ +====== +trexio +====== + +Module for handling TREXIO files. +See https://github.com/trex-coe/trexio diff --git a/src/trexio/export_trexio.irp.f b/src/trexio/export_trexio.irp.f new file mode 100644 index 00000000..3ae0dcb4 --- /dev/null +++ b/src/trexio/export_trexio.irp.f @@ -0,0 +1,7 @@ +program export_trexio_prog + implicit none + read_wf = .True. + SOFT_TOUCH read_wf + call export_trexio +end + diff --git a/src/trexio/export_trexio_routines.irp.f b/src/trexio/export_trexio_routines.irp.f new file mode 100644 index 00000000..d69e7a70 --- /dev/null +++ b/src/trexio/export_trexio_routines.irp.f @@ -0,0 +1,604 @@ +subroutine export_trexio + use trexio + implicit none + BEGIN_DOC + ! Exports the wave function in TREXIO format + END_DOC + + integer(trexio_t) :: f ! TREXIO file handle + integer(trexio_exit_code) :: rc + double precision, allocatable :: factor(:) + + print *, 'TREXIO file : '//trim(trexio_filename) + print *, '' + + call system('cp '//trim(trexio_filename)//' '//trim(trexio_filename)//'.bak') + if (backend == 0) then + f = trexio_open(trexio_filename, 'u', TREXIO_HDF5, rc) + else if (backend == 1) then + f = trexio_open(trexio_filename, 'u', TREXIO_TEXT, rc) + endif + if (f == 0_8) then + print *, 'Unable to open TREXIO file for writing' + print *, 'rc = ', rc + stop -1 + endif + call ezfio_set_trexio_trexio_file(trexio_filename) + +! ------------------------------------------------------------------------------ + +! Electrons +! --------- + + print *, 'Electrons' + + rc = trexio_write_electron_up_num(f, elec_alpha_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_electron_dn_num(f, elec_beta_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + +! Nuclei +! ------ + + print *, 'Nuclei' + + rc = trexio_write_nucleus_num(f, nucl_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_nucleus_charge(f, nucl_charge) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_nucleus_coord(f, nucl_coord_transp) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_nucleus_label(f, nucl_label, 32) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_nucleus_repulsion(f, nuclear_repulsion) + call trexio_assert(rc, TREXIO_SUCCESS) + + +! Pseudo-potentials +! ----------------- + + if (do_pseudo) then + + print *, 'ECP' + integer :: num + + num = 0 + do k=1,pseudo_klocmax + do i=1,nucl_num + if (pseudo_dz_k(i,k) /= 0.d0) then + num = num+1 + end if + end do + end do + + do l=0,pseudo_lmax + do k=1,pseudo_kmax + do i=1,nucl_num + if (pseudo_dz_kl(i,k,l) /= 0.d0) then + num = num+1 + end if + end do + end do + end do + + integer, allocatable :: ang_mom(:), nucleus_index(:), power(:), lmax(:) + double precision, allocatable :: exponent(:), coefficient(:) + + allocate(ang_mom(num), nucleus_index(num), exponent(num), coefficient(num), power(num), & + lmax(nucl_num) ) + + do i=1,nucl_num + lmax(i) = -1 + do l=0,pseudo_lmax + do k=1,pseudo_kmax + if (pseudo_dz_kl_transp(k,l,i) /= 0.d0) then + lmax(i) = max(lmax(i), l) + end if + end do + end do + end do + + j = 0 + do i=1,nucl_num + do k=1,pseudo_klocmax + if (pseudo_dz_k_transp(k,i) /= 0.d0) then + j = j+1 + ang_mom(j) = lmax(i)+1 + nucleus_index(j) = i + exponent(j) = pseudo_dz_k_transp(k,i) + coefficient(j) = pseudo_v_k_transp(k,i) + power(j) = pseudo_n_k_transp(k,i) + end if + end do + + do l=0,lmax(i) + do k=1,pseudo_kmax + if (pseudo_dz_kl_transp(k,l,i) /= 0.d0) then + j = j+1 + ang_mom(j) = l + nucleus_index(j) = i + exponent(j) = pseudo_dz_kl_transp(k,l,i) + coefficient(j) = pseudo_v_kl_transp(k,l,i) + power(j) = pseudo_n_kl_transp(k,l,i) + end if + end do + end do + end do + + + lmax(:) = lmax(:)+1 + rc = trexio_write_ecp_max_ang_mom_plus_1(f, lmax) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_z_core(f, int(nucl_charge_remove)) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_num(f, num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_ang_mom(f, ang_mom) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_nucleus_index(f, nucleus_index) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_exponent(f, exponent) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_coefficient(f, coefficient) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_power(f, power) + call trexio_assert(rc, TREXIO_SUCCESS) + + endif + + +! Basis +! ----- + + print *, 'Basis' + + + rc = trexio_write_basis_type(f, 'Gaussian', len('Gaussian')) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_prim_num(f, prim_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_shell_num(f, shell_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_nucleus_index(f, basis_nucleus_index) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_shell_ang_mom(f, shell_ang_mom) + call trexio_assert(rc, TREXIO_SUCCESS) + + allocate(factor(shell_num)) + if (ao_normalized) then + factor(1:shell_num) = shell_normalization_factor(1:shell_num) + else + factor(1:shell_num) = 1.d0 + endif + rc = trexio_write_basis_shell_factor(f, factor) + call trexio_assert(rc, TREXIO_SUCCESS) + + deallocate(factor) + + rc = trexio_write_basis_shell_index(f, shell_index) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_exponent(f, prim_expo) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_coefficient(f, prim_coef) + call trexio_assert(rc, TREXIO_SUCCESS) + + allocate(factor(prim_num)) + if (primitives_normalized) then + factor(1:prim_num) = prim_normalization_factor(1:prim_num) + else + factor(1:prim_num) = 1.d0 + endif + rc = trexio_write_basis_prim_factor(f, factor) + call trexio_assert(rc, TREXIO_SUCCESS) + deallocate(factor) + + +! Atomic orbitals +! --------------- + + print *, 'AOs' + + rc = trexio_write_ao_num(f, ao_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ao_cartesian(f, 1) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ao_shell(f, ao_shell) + call trexio_assert(rc, TREXIO_SUCCESS) + + integer :: i, pow0(3), powA(3), j, k, l, nz + double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c + nz=100 + + C_A(1) = 0.d0 + C_A(2) = 0.d0 + C_A(3) = 0.d0 + + allocate(factor(ao_num)) + if (ao_normalized) then + do i=1,ao_num + l = ao_first_of_shell(ao_shell(i)) + factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0)) + enddo + else + factor(:) = 1.d0 + endif + rc = trexio_write_ao_normalization(f, factor) + call trexio_assert(rc, TREXIO_SUCCESS) + deallocate(factor) + +! One-e AO integrals +! ------------------ + + if (export_ao_one_e_ints) then + print *, 'AO one-e integrals' + + rc = trexio_write_ao_1e_int_overlap(f,ao_overlap) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ao_1e_int_kinetic(f,ao_kinetic_integrals) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ao_1e_int_potential_n_e(f,ao_integrals_n_e) + call trexio_assert(rc, TREXIO_SUCCESS) + + if (do_pseudo) then + rc = trexio_write_ao_1e_int_ecp(f, ao_pseudo_integrals_local + ao_pseudo_integrals_non_local) + call trexio_assert(rc, TREXIO_SUCCESS) + endif + + rc = trexio_write_ao_1e_int_core_hamiltonian(f,ao_one_e_integrals) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + +! Two-e AO integrals +! ------------------ + + if (export_ao_two_e_ints) then + print *, 'AO two-e integrals' + PROVIDE ao_two_e_integrals_in_map + + integer(8), parameter :: BUFSIZE=100000_8 + double precision :: eri_buffer(BUFSIZE), integral + integer(4) :: eri_index(4,BUFSIZE) + integer(8) :: icount, offset + + double precision, external :: get_ao_two_e_integral + + + icount = 0_8 + offset = 0_8 + do l=1,ao_num + do k=1,ao_num + do j=l,ao_num + do i=k,ao_num + if (i==j .and. k= 0_8) then + rc = trexio_write_ao_2e_int_eri(f, offset, icount, eri_index, eri_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + end if + +! Two-e AO integrals - Cholesky +! ----------------------------- + + integer(4) :: chol_index(3,BUFSIZE) + double precision :: chol_buffer(BUFSIZE) + + if (export_ao_two_e_ints_cholesky) then + print *, 'AO two-e integrals Cholesky' + + rc = trexio_write_ao_2e_int_eri_cholesky_num(f, cholesky_ao_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + icount = 0_8 + offset = 0_8 + do k=1,cholesky_ao_num + do j=1,ao_num + do i=1,ao_num + integral = cholesky_ao(i,j,k) + if (integral == 0.d0) cycle + icount += 1_8 + chol_buffer(icount) = integral + chol_index(1,icount) = i + chol_index(2,icount) = j + chol_index(3,icount) = k + if (icount == BUFSIZE) then + rc = trexio_write_ao_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + offset += icount + icount = 0_8 + end if + end do + end do + end do + + if (icount > 0_8) then + rc = trexio_write_ao_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + end if + + + +! Molecular orbitals +! ------------------ + + print *, 'MOs' + + rc = trexio_write_mo_type(f, mo_label, len(trim(mo_label))) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_mo_num(f, mo_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_mo_coefficient(f, mo_coef) + call trexio_assert(rc, TREXIO_SUCCESS) + + if ( (trim(mo_label) == 'Canonical').and. & + (export_mo_two_e_ints_cholesky.or.export_mo_two_e_ints) ) then + rc = trexio_write_mo_energy(f, fock_matrix_diag_mo) + call trexio_assert(rc, TREXIO_SUCCESS) + endif + + rc = trexio_write_mo_class(f, mo_class, len(mo_class(1))) + call trexio_assert(rc, TREXIO_SUCCESS) + +! One-e MO integrals +! ------------------ + + if (export_mo_one_e_ints) then + print *, 'MO one-e integrals' + + rc = trexio_write_mo_1e_int_kinetic(f,mo_kinetic_integrals) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_mo_1e_int_potential_n_e(f,mo_integrals_n_e) + call trexio_assert(rc, TREXIO_SUCCESS) + + if (do_pseudo) then + rc = trexio_write_mo_1e_int_ecp(f,mo_pseudo_integrals_local) + call trexio_assert(rc, TREXIO_SUCCESS) + endif + + rc = trexio_write_mo_1e_int_core_hamiltonian(f,mo_one_e_integrals) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + +! Two-e MO integrals +! ------------------ + + if (export_mo_two_e_ints) then + print *, 'MO two-e integrals' + PROVIDE mo_two_e_integrals_in_map + + double precision, external :: mo_two_e_integral + + + icount = 0_8 + offset = 0_8 + do l=1,mo_num + do k=1,mo_num + do j=l,mo_num + do i=k,mo_num + if (i==j .and. k 0_8) then + rc = trexio_write_mo_2e_int_eri(f, offset, icount, eri_index, eri_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + end if + +! Two-e MO integrals - Cholesky +! ----------------------------- + + if (export_mo_two_e_ints_cholesky) then + print *, 'MO two-e integrals Cholesky' + + rc = trexio_write_mo_2e_int_eri_cholesky_num(f, cholesky_ao_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + icount = 0_8 + offset = 0_8 + do k=1,cholesky_ao_num + do j=1,mo_num + do i=1,mo_num + integral = cholesky_mo(i,j,k) + if (integral == 0.d0) cycle + icount += 1_8 + chol_buffer(icount) = integral + chol_index(1,icount) = i + chol_index(2,icount) = j + chol_index(3,icount) = k + if (icount == BUFSIZE) then + rc = trexio_write_mo_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + offset += icount + icount = 0_8 + end if + end do + end do + end do + + if (icount > 0_8) then + rc = trexio_write_mo_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + end if + + +! One-e RDM +! --------- + + rc = trexio_write_rdm_1e(f,one_e_dm_mo) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_rdm_1e_up(f,one_e_dm_mo_alpha_average) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_rdm_1e_dn(f,one_e_dm_mo_beta_average) + call trexio_assert(rc, TREXIO_SUCCESS) + + +! Two-e RDM +! --------- + + if (export_rdm) then + PROVIDE two_e_dm_mo + print *, 'Two-e RDM' + + icount = 0_8 + offset = 0_8 + do l=1,mo_num + do k=1,mo_num + do j=1,mo_num + do i=1,mo_num + integral = two_e_dm_mo(i,j,k,l) + if (integral == 0.d0) cycle + icount += 1_8 + eri_buffer(icount) = integral + eri_index(1,icount) = i + eri_index(2,icount) = j + eri_index(3,icount) = k + eri_index(4,icount) = l + if (icount == BUFSIZE) then + rc = trexio_write_rdm_2e(f, offset, icount, eri_index, eri_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + offset += icount + icount = 0_8 + end if + end do + end do + end do + end do + + if (icount >= 0_8) then + rc = trexio_write_rdm_2e(f, offset, icount, eri_index, eri_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + end if + + +! ------------------------------------------------------------------------------ + + ! Determinants + ! ------------ + + integer*8, allocatable :: det_buffer(:,:,:) + double precision, allocatable :: coef_buffer(:,:) + integer :: nint + +! rc = trexio_read_determinant_int64_num(f, nint) +! call trexio_assert(rc, TREXIO_SUCCESS) + nint = N_int + if (nint /= N_int) then + stop 'Problem with N_int' + endif + allocate ( det_buffer(nint, 2, BUFSIZE), coef_buffer(BUFSIZE, n_states) ) + + icount = 0_8 + offset = 0_8 + rc = trexio_write_state_num(f, n_states) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_set_state (f, 0) + call trexio_assert(rc, TREXIO_SUCCESS) + do k=1,n_det + icount += 1_8 + det_buffer(1:nint, 1:2, icount) = psi_det(1:N_int, 1:2, k) + coef_buffer(icount,1:N_states) = psi_coef(k,1:N_states) + if (icount == BUFSIZE) then + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_determinant_list(f, offset, icount, det_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + do i=1,N_states + rc = trexio_set_state (f, i-1) + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_determinant_coefficient(f, offset, icount, coef_buffer(1,i)) + end do + rc = trexio_set_state (f, 0) + offset += icount + icount = 0_8 + end if + end do + + if (icount >= 0_8) then + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_determinant_list(f, offset, icount, det_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + do i=1,N_states + rc = trexio_set_state (f, i-1) + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_determinant_coefficient(f, offset, icount, coef_buffer(1,i)) + end do + rc = trexio_set_state (f, 0) + end if + + deallocate ( det_buffer, coef_buffer ) + + rc = trexio_close(f) + call trexio_assert(rc, TREXIO_SUCCESS) + +end + + +! -*- mode: f90 -*- diff --git a/src/trexio/import_trexio_determinants.irp.f b/src/trexio/import_trexio_determinants.irp.f new file mode 100644 index 00000000..1759bb94 --- /dev/null +++ b/src/trexio/import_trexio_determinants.irp.f @@ -0,0 +1,79 @@ +program import_determinants_ao + call run +end + +subroutine run + use trexio + use map_module + implicit none + BEGIN_DOC +! Program to import determinants from TREXIO + END_DOC + + integer(trexio_t) :: f ! TREXIO file handle + integer(trexio_exit_code) :: rc + + integer :: m + + double precision, allocatable :: coef_buffer(:,:) + integer*8 , allocatable :: det_buffer(:,:,:) + + f = trexio_open(trexio_filename, 'r', TREXIO_AUTO, rc) + if (f == 0_8) then + print *, 'Unable to open TREXIO file for reading' + print *, 'rc = ', rc + stop -1 + endif + + + + ! Determinants + ! ------------ + + integer :: nint, nstates + integer :: bufsize + + rc = trexio_read_state_num(f, nstates) + call trexio_assert(rc, TREXIO_SUCCESS) + +! rc = trexio_read_determinant_int64_num(f, nint) +! call trexio_assert(rc, TREXIO_SUCCESS) + nint = N_int + if (nint /= N_int) then + stop 'Problem with N_int' + endif + + integer*8 :: offset, icount + + rc = trexio_read_determinant_num(f, bufsize) + call trexio_assert(rc, TREXIO_SUCCESS) + print *, 'N_det = ', bufsize + + allocate ( det_buffer(nint, 2, bufsize), coef_buffer(bufsize, n_states) ) + + + offset = 0_8 + icount = bufsize + + rc = trexio_read_determinant_list(f, offset, icount, det_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + if (icount /= bufsize) then + print *, 'error: bufsize /= N_det: ', bufsize, icount + stop -1 + endif + + do m=1,nstates + rc = trexio_set_state(f, m-1) + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_read_determinant_coefficient(f, offset, icount, coef_buffer(1,m)) + call trexio_assert(rc, TREXIO_SUCCESS) + if (icount /= bufsize) then + print *, 'error: bufsize /= N_det for state', m, ':', icount, bufsize + stop -1 + endif + enddo + + call save_wavefunction_general(bufsize,nstates,det_buffer,size(coef_buffer,1),coef_buffer) + + +end diff --git a/src/trexio/import_trexio_integrals.irp.f b/src/trexio/import_trexio_integrals.irp.f new file mode 100644 index 00000000..9f9ad9d6 --- /dev/null +++ b/src/trexio/import_trexio_integrals.irp.f @@ -0,0 +1,146 @@ +program import_integrals_ao + use trexio + implicit none + integer(trexio_t) :: f ! TREXIO file handle + integer(trexio_exit_code) :: rc + + f = trexio_open(trexio_filename, 'r', TREXIO_AUTO, rc) + if (f == 0_8) then + print *, 'Unable to open TREXIO file for reading' + print *, 'rc = ', rc + stop -1 + endif + + call run(f) + rc = trexio_close(f) + call trexio_assert(rc, TREXIO_SUCCESS) +end + +subroutine run(f) + use trexio + use map_module + implicit none + BEGIN_DOC +! Program to import integrals from TREXIO + END_DOC + + integer(trexio_t), intent(in) :: f ! TREXIO file handle + integer(trexio_exit_code) :: rc + + integer ::i,j,k,l + integer(8) :: m, n_integrals + double precision :: integral + + integer(key_kind), allocatable :: buffer_i(:) + real(integral_kind), allocatable :: buffer_values(:) + + + double precision, allocatable :: A(:,:) + double precision, allocatable :: V(:) + integer , allocatable :: Vi(:,:) + double precision :: s + + if (trexio_has_nucleus_repulsion(f) == TREXIO_SUCCESS) then + rc = trexio_read_nucleus_repulsion(f, s) + call trexio_assert(rc, TREXIO_SUCCESS) + if (rc /= TREXIO_SUCCESS) then + print *, irp_here, rc + print *, 'Error reading nuclear repulsion' + stop -1 + endif + call ezfio_set_nuclei_nuclear_repulsion(s) + call ezfio_set_nuclei_io_nuclear_repulsion('Read') + endif + + ! AO integrals + ! ------------ + + allocate(A(ao_num, ao_num)) + + + if (trexio_has_ao_1e_int_overlap(f) == TREXIO_SUCCESS) then + rc = trexio_read_ao_1e_int_overlap(f, A) + if (rc /= TREXIO_SUCCESS) then + print *, irp_here + print *, 'Error reading AO overlap' + stop -1 + endif + call ezfio_set_ao_one_e_ints_ao_integrals_overlap(A) + call ezfio_set_ao_one_e_ints_io_ao_integrals_overlap('Read') + endif + + if (trexio_has_ao_1e_int_kinetic(f) == TREXIO_SUCCESS) then + rc = trexio_read_ao_1e_int_kinetic(f, A) + if (rc /= TREXIO_SUCCESS) then + print *, irp_here + print *, 'Error reading AO kinetic integrals' + stop -1 + endif + call ezfio_set_ao_one_e_ints_ao_integrals_kinetic(A) + call ezfio_set_ao_one_e_ints_io_ao_integrals_kinetic('Read') + endif + +! if (trexio_has_ao_1e_int_ecp(f) == TREXIO_SUCCESS) then +! rc = trexio_read_ao_1e_int_ecp(f, A) +! if (rc /= TREXIO_SUCCESS) then +! print *, irp_here +! print *, 'Error reading AO ECP local integrals' +! stop -1 +! endif +! call ezfio_set_ao_one_e_ints_ao_integrals_pseudo(A) +! call ezfio_set_ao_one_e_ints_io_ao_integrals_pseudo('Read') +! endif + + if (trexio_has_ao_1e_int_potential_n_e(f) == TREXIO_SUCCESS) then + rc = trexio_read_ao_1e_int_potential_n_e(f, A) + if (rc /= TREXIO_SUCCESS) then + print *, irp_here + print *, 'Error reading AO potential N-e integrals' + stop -1 + endif + call ezfio_set_ao_one_e_ints_ao_integrals_n_e(A) + call ezfio_set_ao_one_e_ints_io_ao_integrals_n_e('Read') + endif + + deallocate(A) + + ! AO 2e integrals + ! --------------- + PROVIDE ao_integrals_map + + integer*4 :: BUFSIZE + BUFSIZE=ao_num**2 + allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE)) + allocate(Vi(4,BUFSIZE), V(BUFSIZE)) + + integer*8 :: offset, icount + + offset = 0_8 + icount = BUFSIZE + rc = TREXIO_SUCCESS + do while (icount == size(V)) + rc = trexio_read_ao_2e_int_eri(f, offset, icount, Vi, V) + do m=1,icount + i = Vi(1,m) + j = Vi(2,m) + k = Vi(3,m) + l = Vi(4,m) + integral = V(m) + call two_e_integrals_index(i, j, k, l, buffer_i(m) ) + buffer_values(m) = integral + enddo + call insert_into_ao_integrals_map(int(icount,4),buffer_i,buffer_values) + offset = offset + icount + if (rc /= TREXIO_SUCCESS) then + exit + endif + end do + n_integrals = offset + + call map_sort(ao_integrals_map) + call map_unique(ao_integrals_map) + + call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) + call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read') + +end diff --git a/src/trexio/qp_import_trexio.py b/src/trexio/qp_import_trexio.py new file mode 100755 index 00000000..de8d1269 --- /dev/null +++ b/src/trexio/qp_import_trexio.py @@ -0,0 +1,415 @@ +#!/usr/bin/env python3 +""" +convert TREXIO file to EZFIO + +Usage: + qp_import_trexio [-o EZFIO_DIR] FILE + +Options: + -o --output=EZFIO_DIR Produced directory + by default is FILE.ezfio + +""" + +import sys +import os +import trexio +import numpy as np +from functools import reduce +from ezfio import ezfio +from docopt import docopt + + +try: + QP_ROOT = os.environ["QP_ROOT"] + QP_EZFIO = os.environ["QP_EZFIO"] +except KeyError: + print("Error: QP_ROOT environment variable not found.") + sys.exit(1) +else: + sys.path = [QP_EZFIO + "/Python", + QP_ROOT + "/install/resultsFile", + QP_ROOT + "/install", + QP_ROOT + "/scripts"] + sys.path + + +def generate_xyz(l): + + def create_z(x,y,z): + return (x, y, l-(x+y)) + + def create_y(accu,x,y,z): + if y == 0: + result = [create_z(x,y,z)] + accu + else: + result = create_y([create_z(x,y,z)] + accu , x, y-1, z) + return result + + def create_x(accu,x,y,z): + if x == 0: + result = create_y([], x,y,z) + accu + else: + xnew = x-1 + ynew = l-xnew + result = create_x(create_y([],x,y,z) + accu , xnew, ynew, z) + return result + + result = create_x([], l, 0, 0) + result.reverse() + return result + + + +def write_ezfio(trexio_filename, filename): + + try: + trexio_file = trexio.File(trexio_filename,mode='r',back_end=trexio.TREXIO_TEXT) + except: + trexio_file = trexio.File(trexio_filename,mode='r',back_end=trexio.TREXIO_HDF5) + + ezfio.set_file(filename) + ezfio.set_trexio_trexio_file(trexio_filename) + + print("Nuclei\t\t...\t", end=' ') + + charge = [0.] + if trexio.has_nucleus(trexio_file): + charge = trexio.read_nucleus_charge(trexio_file) + ezfio.set_nuclei_nucl_num(len(charge)) + ezfio.set_nuclei_nucl_charge(charge) + + coord = trexio.read_nucleus_coord(trexio_file) + coord = np.transpose(coord) + ezfio.set_nuclei_nucl_coord(coord) + + label = trexio.read_nucleus_label(trexio_file) + nucl_num = trexio.read_nucleus_num(trexio_file) + + # Transformt H1 into H + import re + p = re.compile(r'(\d*)$') + label = [p.sub("", x).capitalize() for x in label] + ezfio.set_nuclei_nucl_label(label) + + else: + ezfio.set_nuclei_nucl_num(1) + ezfio.set_nuclei_nucl_charge([0.]) + ezfio.set_nuclei_nucl_coord([0.,0.,0.]) + ezfio.set_nuclei_nucl_label(["X"]) + + print("OK") + + + print("Electrons\t...\t", end=' ') + + try: + num_beta = trexio.read_electron_dn_num(trexio_file) + except: + num_beta = sum(charge)//2 + + try: + num_alpha = trexio.read_electron_up_num(trexio_file) + except: + num_alpha = sum(charge) - num_beta + + if num_alpha == 0: + print("\n\nError: There are zero electrons in the TREXIO file.\n\n") + sys.exit(1) + ezfio.set_electrons_elec_alpha_num(num_alpha) + ezfio.set_electrons_elec_beta_num(num_beta) + + print("OK") + + print("Basis\t\t...\t", end=' ') + + shell_num = 0 + try: + basis_type = trexio.read_basis_type(trexio_file) + + if basis_type.lower() not in ["gaussian", "slater"]: + raise TypeError + + shell_num = trexio.read_basis_shell_num(trexio_file) + prim_num = trexio.read_basis_prim_num(trexio_file) + ang_mom = trexio.read_basis_shell_ang_mom(trexio_file) + nucl_index = trexio.read_basis_nucleus_index(trexio_file) + exponent = trexio.read_basis_exponent(trexio_file) + coefficient = trexio.read_basis_coefficient(trexio_file) + shell_index = trexio.read_basis_shell_index(trexio_file) + ao_shell = trexio.read_ao_shell(trexio_file) + + ezfio.set_basis_basis("Read from TREXIO") + ezfio.set_basis_shell_num(shell_num) + ezfio.set_basis_prim_num(prim_num) + ezfio.set_basis_shell_ang_mom(ang_mom) + ezfio.set_basis_basis_nucleus_index([ x+1 for x in nucl_index ]) + ezfio.set_basis_prim_expo(exponent) + ezfio.set_basis_prim_coef(coefficient) + + nucl_shell_num = [] + prev = None + m = 0 + for i in ao_shell: + if i != prev: + m += 1 + if prev is None or nucl_index[i] != nucl_index[prev]: + nucl_shell_num.append(m) + m = 0 + prev = i + assert (len(nucl_shell_num) == nucl_num) + + shell_prim_num = [] + prev = shell_index[0] + count = 0 + for i in shell_index: + if i != prev: + shell_prim_num.append(count) + count = 0 + count += 1 + prev = i + shell_prim_num.append(count) + + assert (len(shell_prim_num) == shell_num) + + ezfio.set_basis_shell_prim_num(shell_prim_num) + ezfio.set_basis_shell_index([x+1 for x in shell_index]) + ezfio.set_basis_nucleus_shell_num(nucl_shell_num) + + + shell_factor = trexio.read_basis_shell_factor(trexio_file) + prim_factor = trexio.read_basis_prim_factor(trexio_file) + + print("OK") + except: + print("None") + ezfio.set_ao_basis_ao_cartesian(True) + + print("AOS\t\t...\t", end=' ') + + try: + cartesian = trexio.read_ao_cartesian(trexio_file) + except: + cartesian = True + + if not cartesian: + raise TypeError('Only cartesian TREXIO files can be converted') + + ao_num = trexio.read_ao_num(trexio_file) + ezfio.set_ao_basis_ao_num(ao_num) + + if shell_num > 0: + ao_shell = trexio.read_ao_shell(trexio_file) + at = [ nucl_index[i]+1 for i in ao_shell ] + ezfio.set_ao_basis_ao_nucl(at) + + num_prim0 = [ 0 for i in range(shell_num) ] + for i in shell_index: + num_prim0[i] += 1 + + coef = {} + expo = {} + for i,c in enumerate(coefficient): + idx = shell_index[i] + if idx in coef: + coef[idx].append(c) + expo[idx].append(exponent[i]) + else: + coef[idx] = [c] + expo[idx] = [exponent[i]] + + coefficient = [] + exponent = [] + power_x = [] + power_y = [] + power_z = [] + num_prim = [] + + for i in range(shell_num): + for x,y,z in generate_xyz(ang_mom[i]): + power_x.append(x) + power_y.append(y) + power_z.append(z) + coefficient.append(coef[i]) + exponent.append(expo[i]) + num_prim.append(num_prim0[i]) + + assert (len(coefficient) == ao_num) + ezfio.set_ao_basis_ao_power(power_x + power_y + power_z) + ezfio.set_ao_basis_ao_prim_num(num_prim) + + prim_num_max = max( [ len(x) for x in coefficient ] ) + + for i in range(ao_num): + coefficient[i] += [0. for j in range(len(coefficient[i]), prim_num_max)] + exponent [i] += [0. for j in range(len(exponent[i]), prim_num_max)] + + coefficient = reduce(lambda x, y: x + y, coefficient, []) + exponent = reduce(lambda x, y: x + y, exponent , []) + + coef = [] + expo = [] + for i in range(prim_num_max): + for j in range(i, len(coefficient), prim_num_max): + coef.append(coefficient[j]) + expo.append(exponent[j]) + +# ezfio.set_ao_basis_ao_prim_num_max(prim_num_max) + ezfio.set_ao_basis_ao_coef(coef) + ezfio.set_ao_basis_ao_expo(expo) + ezfio.set_ao_basis_ao_basis("Read from TREXIO") + + print("OK") + + + # _ + # |\/| _ _ |_) _. _ o _ + # | | (_) _> |_) (_| _> | _> + # + + print("MOS\t\t...\t", end=' ') + + labels = { "Canonical" : "Canonical", + "RHF" : "Canonical", + "BOYS" : "Localized", + "ROHF" : "Canonical", + "UHF" : "Canonical", + "Natural": "Natural" } + try: + label = labels[trexio.read_mo_type(trexio_file)] + except: + label = "None" + ezfio.set_mo_basis_mo_label(label) + + try: + clss = trexio.read_mo_class(trexio_file) + core = [ i for i in clss if i.lower() == "core" ] + inactive = [ i for i in clss if i.lower() == "inactive" ] + active = [ i for i in clss if i.lower() == "active" ] + virtual = [ i for i in clss if i.lower() == "virtual" ] + deleted = [ i for i in clss if i.lower() == "deleted" ] + except trexio.Error: + pass + + try: + mo_num = trexio.read_mo_num(trexio_file) + ezfio.set_mo_basis_mo_num(mo_num) + + MoMatrix = trexio.read_mo_coefficient(trexio_file) + ezfio.set_mo_basis_mo_coef(MoMatrix) + + mo_occ = [ 0. for i in range(mo_num) ] + for i in range(num_alpha): + mo_occ[i] += 1. + for i in range(num_beta): + mo_occ[i] += 1. + ezfio.set_mo_basis_mo_occ(mo_occ) + except: + pass + + print("OK") + + + print("Pseudos\t\t...\t", end=' ') + + ezfio.set_pseudo_do_pseudo(False) + + if trexio.has_ecp_ang_mom(trexio_file): + ezfio.set_pseudo_do_pseudo(True) + max_ang_mom_plus_1 = trexio.read_ecp_max_ang_mom_plus_1(trexio_file) + z_core = trexio.read_ecp_z_core(trexio_file) + ang_mom = trexio.read_ecp_ang_mom(trexio_file) + nucleus_index = trexio.read_ecp_nucleus_index(trexio_file) + exponent = trexio.read_ecp_exponent(trexio_file) + coefficient = trexio.read_ecp_coefficient(trexio_file) + power = trexio.read_ecp_power(trexio_file) + + lmax = max( max_ang_mom_plus_1 ) - 1 + ezfio.set_pseudo_pseudo_lmax(lmax) + ezfio.set_pseudo_nucl_charge_remove(z_core) + + prev_center = None + ecp = {} + for i in range(len(ang_mom)): + center = nucleus_index[i] + if center != prev_center: + ecp[center] = { "lmax": max_ang_mom_plus_1[center], + "zcore": z_core[center], + "contr": {} } + for j in range(max_ang_mom_plus_1[center]+1): + ecp[center]["contr"][j] = [] + + ecp[center]["contr"][ang_mom[i]].append( (coefficient[i], power[i], exponent[i]) ) + prev_center = center + + ecp_loc = {} + ecp_nl = {} + kmax = 0 + klocmax = 0 + for center in ecp: + ecp_nl [center] = {} + for k in ecp[center]["contr"]: + if k == ecp[center]["lmax"]: + ecp_loc[center] = ecp[center]["contr"][k] + klocmax = max(len(ecp_loc[center]), klocmax) + else: + ecp_nl [center][k] = ecp[center]["contr"][k] + kmax = max(len(ecp_nl [center][k]), kmax) + + ezfio.set_pseudo_pseudo_klocmax(klocmax) + ezfio.set_pseudo_pseudo_kmax(kmax) + + pseudo_n_k = [[0 for _ in range(nucl_num)] for _ in range(klocmax)] + pseudo_v_k = [[0. for _ in range(nucl_num)] for _ in range(klocmax)] + pseudo_dz_k = [[0. for _ in range(nucl_num)] for _ in range(klocmax)] + pseudo_n_kl = [[[0 for _ in range(nucl_num)] for _ in range(kmax)] for _ in range(lmax+1)] + pseudo_v_kl = [[[0. for _ in range(nucl_num)] for _ in range(kmax)] for _ in range(lmax+1)] + pseudo_dz_kl = [[[0. for _ in range(nucl_num)] for _ in range(kmax)] for _ in range(lmax+1)] + for center in ecp_loc: + for k in range( len(ecp_loc[center]) ): + v, n, dz = ecp_loc[center][k] + pseudo_n_k[k][center] = n + pseudo_v_k[k][center] = v + pseudo_dz_k[k][center] = dz + + ezfio.set_pseudo_pseudo_n_k(pseudo_n_k) + ezfio.set_pseudo_pseudo_v_k(pseudo_v_k) + ezfio.set_pseudo_pseudo_dz_k(pseudo_dz_k) + + for center in ecp_nl: + for l in range( len(ecp_nl[center]) ): + for k in range( len(ecp_nl[center][l]) ): + v, n, dz = ecp_nl[center][l][k] + pseudo_n_kl[l][k][center] = n + pseudo_v_kl[l][k][center] = v + pseudo_dz_kl[l][k][center] = dz + + ezfio.set_pseudo_pseudo_n_kl(pseudo_n_kl) + ezfio.set_pseudo_pseudo_v_kl(pseudo_v_kl) + ezfio.set_pseudo_pseudo_dz_kl(pseudo_dz_kl) + + + print("OK") + + + + +def get_full_path(file_path): + file_path = os.path.expanduser(file_path) + file_path = os.path.expandvars(file_path) + return file_path + + +if __name__ == '__main__': + ARGUMENTS = docopt(__doc__) + + FILE = get_full_path(ARGUMENTS['FILE']) + trexio_filename = FILE + + if ARGUMENTS["--output"]: + EZFIO_FILE = get_full_path(ARGUMENTS["--output"]) + else: + EZFIO_FILE = "{0}.ezfio".format(FILE) + + write_ezfio(trexio_filename, EZFIO_FILE) + sys.stdout.flush() + diff --git a/src/trexio/trexio_file.irp.f b/src/trexio/trexio_file.irp.f new file mode 100644 index 00000000..c9897748 --- /dev/null +++ b/src/trexio/trexio_file.irp.f @@ -0,0 +1,20 @@ +BEGIN_PROVIDER [ character*(1024), trexio_filename ] + implicit none + BEGIN_DOC + ! Name of the TREXIO file + END_DOC + character*(1024) :: prefix + + trexio_filename = trexio_file + + if (trexio_file == 'None') then + prefix = trim(ezfio_work_dir)//trim(ezfio_filename) + if (backend == 0) then + trexio_filename = trim(prefix)//'.h5' + else if (backend == 1) then + trexio_filename = trim(prefix) + endif + endif +END_PROVIDER + + diff --git a/src/trexio/trexio_module.F90 b/src/trexio/trexio_module.F90 new file mode 100644 index 00000000..acd08492 --- /dev/null +++ b/src/trexio/trexio_module.F90 @@ -0,0 +1 @@ +#include "trexio_f.f90" From a2627e79255f3b490beca663ae1f1464e76ab96d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 12 May 2023 16:04:45 +0200 Subject: [PATCH 18/29] Introduced TREXIO in QP --- configure | 2 +- scripts/compilation/qp_create_ninja | 2 +- src/trexio/export_trexio_routines.irp.f | 223 +++++++++++++----------- src/two_body_rdm/two_e_dm_mo.irp.f | 1 - 4 files changed, 123 insertions(+), 105 deletions(-) diff --git a/configure b/configure index 4dd753ff..66bc9419 100755 --- a/configure +++ b/configure @@ -9,7 +9,7 @@ echo "QP_ROOT="$QP_ROOT unset CC unset CCXX -TREXIO_VERSION=2.3.1 +TREXIO_VERSION=2.3.2 # Force GCC instead of ICC for dependencies export CC=gcc diff --git a/scripts/compilation/qp_create_ninja b/scripts/compilation/qp_create_ninja index 27b34901..606fd0f6 100755 --- a/scripts/compilation/qp_create_ninja +++ b/scripts/compilation/qp_create_ninja @@ -38,7 +38,7 @@ def comp_path(path): from qp_path import QP_ROOT, QP_SRC, QP_EZFIO -LIB = " -lz" +LIB = " -lz -ltrexio" EZFIO_LIB = join("$QP_ROOT", "lib", "libezfio_irp.a") ZMQ_LIB = join("$QP_ROOT", "lib", "libf77zmq.a") + " " + join("$QP_ROOT", "lib", "libzmq.a") + " -lstdc++ -lrt -ldl" ROOT_BUILD_NINJA = join("$QP_ROOT", "config", "build.ninja") diff --git a/src/trexio/export_trexio_routines.irp.f b/src/trexio/export_trexio_routines.irp.f index d69e7a70..c55ddc5e 100644 --- a/src/trexio/export_trexio_routines.irp.f +++ b/src/trexio/export_trexio_routines.irp.f @@ -5,24 +5,35 @@ subroutine export_trexio ! Exports the wave function in TREXIO format END_DOC - integer(trexio_t) :: f ! TREXIO file handle + integer(trexio_t) :: f(N_states) ! TREXIO file handle integer(trexio_exit_code) :: rc + integer :: k double precision, allocatable :: factor(:) + character*(256) :: filenames(N_states) - print *, 'TREXIO file : '//trim(trexio_filename) + filenames(1) = trexio_filename + do k=2,N_states + write(filenames(k),'(A,I3.3)') trim(trexio_filename)//'.', k-1 + enddo + + do k=1,N_states + print *, 'TREXIO file : ', trim(filenames(k)) + call system('test -f '//trim(filenames(k))//' && mv '//trim(filenames(k))//' '//trim(filenames(k))//'.bak') + enddo print *, '' - call system('cp '//trim(trexio_filename)//' '//trim(trexio_filename)//'.bak') - if (backend == 0) then - f = trexio_open(trexio_filename, 'u', TREXIO_HDF5, rc) - else if (backend == 1) then - f = trexio_open(trexio_filename, 'u', TREXIO_TEXT, rc) - endif - if (f == 0_8) then - print *, 'Unable to open TREXIO file for writing' - print *, 'rc = ', rc - stop -1 - endif + do k=1,N_states + if (backend == 0) then + f(k) = trexio_open(filenames(k), 'u', TREXIO_HDF5, rc) + else if (backend == 1) then + f(k) = trexio_open(filenames(k), 'u', TREXIO_TEXT, rc) + endif + if (f(k) == 0_8) then + print *, 'Unable to open TREXIO file for writing' + print *, 'rc = ', rc + stop -1 + endif + enddo call ezfio_set_trexio_trexio_file(trexio_filename) ! ------------------------------------------------------------------------------ @@ -32,10 +43,10 @@ subroutine export_trexio print *, 'Electrons' - rc = trexio_write_electron_up_num(f, elec_alpha_num) + rc = trexio_write_electron_up_num(f(1), elec_alpha_num) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_electron_dn_num(f, elec_beta_num) + rc = trexio_write_electron_dn_num(f(1), elec_beta_num) call trexio_assert(rc, TREXIO_SUCCESS) @@ -44,19 +55,19 @@ subroutine export_trexio print *, 'Nuclei' - rc = trexio_write_nucleus_num(f, nucl_num) + rc = trexio_write_nucleus_num(f(1), nucl_num) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_nucleus_charge(f, nucl_charge) + rc = trexio_write_nucleus_charge(f(1), nucl_charge) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_nucleus_coord(f, nucl_coord_transp) + rc = trexio_write_nucleus_coord(f(1), nucl_coord_transp) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_nucleus_label(f, nucl_label, 32) + rc = trexio_write_nucleus_label(f(1), nucl_label, 32) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_nucleus_repulsion(f, nuclear_repulsion) + rc = trexio_write_nucleus_repulsion(f(1), nuclear_repulsion) call trexio_assert(rc, TREXIO_SUCCESS) @@ -133,28 +144,28 @@ subroutine export_trexio lmax(:) = lmax(:)+1 - rc = trexio_write_ecp_max_ang_mom_plus_1(f, lmax) + rc = trexio_write_ecp_max_ang_mom_plus_1(f(1), lmax) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ecp_z_core(f, int(nucl_charge_remove)) + rc = trexio_write_ecp_z_core(f(1), int(nucl_charge_remove)) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ecp_num(f, num) + rc = trexio_write_ecp_num(f(1), num) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ecp_ang_mom(f, ang_mom) + rc = trexio_write_ecp_ang_mom(f(1), ang_mom) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ecp_nucleus_index(f, nucleus_index) + rc = trexio_write_ecp_nucleus_index(f(1), nucleus_index) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ecp_exponent(f, exponent) + rc = trexio_write_ecp_exponent(f(1), exponent) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ecp_coefficient(f, coefficient) + rc = trexio_write_ecp_coefficient(f(1), coefficient) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ecp_power(f, power) + rc = trexio_write_ecp_power(f(1), power) call trexio_assert(rc, TREXIO_SUCCESS) endif @@ -166,19 +177,19 @@ subroutine export_trexio print *, 'Basis' - rc = trexio_write_basis_type(f, 'Gaussian', len('Gaussian')) + rc = trexio_write_basis_type(f(1), 'Gaussian', len('Gaussian')) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_prim_num(f, prim_num) + rc = trexio_write_basis_prim_num(f(1), prim_num) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_shell_num(f, shell_num) + rc = trexio_write_basis_shell_num(f(1), shell_num) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_nucleus_index(f, basis_nucleus_index) + rc = trexio_write_basis_nucleus_index(f(1), basis_nucleus_index) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_shell_ang_mom(f, shell_ang_mom) + rc = trexio_write_basis_shell_ang_mom(f(1), shell_ang_mom) call trexio_assert(rc, TREXIO_SUCCESS) allocate(factor(shell_num)) @@ -187,18 +198,18 @@ subroutine export_trexio else factor(1:shell_num) = 1.d0 endif - rc = trexio_write_basis_shell_factor(f, factor) + rc = trexio_write_basis_shell_factor(f(1), factor) call trexio_assert(rc, TREXIO_SUCCESS) deallocate(factor) - rc = trexio_write_basis_shell_index(f, shell_index) + rc = trexio_write_basis_shell_index(f(1), shell_index) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_exponent(f, prim_expo) + rc = trexio_write_basis_exponent(f(1), prim_expo) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_coefficient(f, prim_coef) + rc = trexio_write_basis_coefficient(f(1), prim_coef) call trexio_assert(rc, TREXIO_SUCCESS) allocate(factor(prim_num)) @@ -207,7 +218,7 @@ subroutine export_trexio else factor(1:prim_num) = 1.d0 endif - rc = trexio_write_basis_prim_factor(f, factor) + rc = trexio_write_basis_prim_factor(f(1), factor) call trexio_assert(rc, TREXIO_SUCCESS) deallocate(factor) @@ -217,16 +228,16 @@ subroutine export_trexio print *, 'AOs' - rc = trexio_write_ao_num(f, ao_num) + rc = trexio_write_ao_num(f(1), ao_num) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_cartesian(f, 1) + rc = trexio_write_ao_cartesian(f(1), 1) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_shell(f, ao_shell) + rc = trexio_write_ao_shell(f(1), ao_shell) call trexio_assert(rc, TREXIO_SUCCESS) - integer :: i, pow0(3), powA(3), j, k, l, nz + integer :: i, pow0(3), powA(3), j, l, nz double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c nz=100 @@ -243,7 +254,7 @@ subroutine export_trexio else factor(:) = 1.d0 endif - rc = trexio_write_ao_normalization(f, factor) + rc = trexio_write_ao_normalization(f(1), factor) call trexio_assert(rc, TREXIO_SUCCESS) deallocate(factor) @@ -253,21 +264,21 @@ subroutine export_trexio if (export_ao_one_e_ints) then print *, 'AO one-e integrals' - rc = trexio_write_ao_1e_int_overlap(f,ao_overlap) + rc = trexio_write_ao_1e_int_overlap(f(1),ao_overlap) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_1e_int_kinetic(f,ao_kinetic_integrals) + rc = trexio_write_ao_1e_int_kinetic(f(1),ao_kinetic_integrals) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_1e_int_potential_n_e(f,ao_integrals_n_e) + rc = trexio_write_ao_1e_int_potential_n_e(f(1),ao_integrals_n_e) call trexio_assert(rc, TREXIO_SUCCESS) if (do_pseudo) then - rc = trexio_write_ao_1e_int_ecp(f, ao_pseudo_integrals_local + ao_pseudo_integrals_non_local) + rc = trexio_write_ao_1e_int_ecp(f(1), ao_pseudo_integrals_local + ao_pseudo_integrals_non_local) call trexio_assert(rc, TREXIO_SUCCESS) endif - rc = trexio_write_ao_1e_int_core_hamiltonian(f,ao_one_e_integrals) + rc = trexio_write_ao_1e_int_core_hamiltonian(f(1),ao_one_e_integrals) call trexio_assert(rc, TREXIO_SUCCESS) end if @@ -303,7 +314,7 @@ subroutine export_trexio eri_index(3,icount) = k eri_index(4,icount) = l if (icount == BUFSIZE) then - rc = trexio_write_ao_2e_int_eri(f, offset, icount, eri_index, eri_buffer) + rc = trexio_write_ao_2e_int_eri(f(1), offset, icount, eri_index, eri_buffer) call trexio_assert(rc, TREXIO_SUCCESS) offset += icount icount = 0_8 @@ -314,7 +325,7 @@ subroutine export_trexio end do if (icount >= 0_8) then - rc = trexio_write_ao_2e_int_eri(f, offset, icount, eri_index, eri_buffer) + rc = trexio_write_ao_2e_int_eri(f(1), offset, icount, eri_index, eri_buffer) call trexio_assert(rc, TREXIO_SUCCESS) end if end if @@ -328,7 +339,7 @@ subroutine export_trexio if (export_ao_two_e_ints_cholesky) then print *, 'AO two-e integrals Cholesky' - rc = trexio_write_ao_2e_int_eri_cholesky_num(f, cholesky_ao_num) + rc = trexio_write_ao_2e_int_eri_cholesky_num(f(1), cholesky_ao_num) call trexio_assert(rc, TREXIO_SUCCESS) icount = 0_8 @@ -344,7 +355,7 @@ subroutine export_trexio chol_index(2,icount) = j chol_index(3,icount) = k if (icount == BUFSIZE) then - rc = trexio_write_ao_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + rc = trexio_write_ao_2e_int_eri_cholesky(f(1), offset, icount, chol_index, chol_buffer) call trexio_assert(rc, TREXIO_SUCCESS) offset += icount icount = 0_8 @@ -354,7 +365,7 @@ subroutine export_trexio end do if (icount > 0_8) then - rc = trexio_write_ao_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + rc = trexio_write_ao_2e_int_eri_cholesky(f(1), offset, icount, chol_index, chol_buffer) call trexio_assert(rc, TREXIO_SUCCESS) end if end if @@ -366,22 +377,24 @@ subroutine export_trexio print *, 'MOs' - rc = trexio_write_mo_type(f, mo_label, len(trim(mo_label))) + rc = trexio_write_mo_type(f(1), mo_label, len(trim(mo_label))) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_mo_num(f, mo_num) - call trexio_assert(rc, TREXIO_SUCCESS) + do k=1,N_states + rc = trexio_write_mo_num(f(k), mo_num) + call trexio_assert(rc, TREXIO_SUCCESS) + enddo - rc = trexio_write_mo_coefficient(f, mo_coef) + rc = trexio_write_mo_coefficient(f(1), mo_coef) call trexio_assert(rc, TREXIO_SUCCESS) if ( (trim(mo_label) == 'Canonical').and. & (export_mo_two_e_ints_cholesky.or.export_mo_two_e_ints) ) then - rc = trexio_write_mo_energy(f, fock_matrix_diag_mo) + rc = trexio_write_mo_energy(f(1), fock_matrix_diag_mo) call trexio_assert(rc, TREXIO_SUCCESS) endif - rc = trexio_write_mo_class(f, mo_class, len(mo_class(1))) + rc = trexio_write_mo_class(f(1), mo_class, len(mo_class(1))) call trexio_assert(rc, TREXIO_SUCCESS) ! One-e MO integrals @@ -390,18 +403,18 @@ subroutine export_trexio if (export_mo_one_e_ints) then print *, 'MO one-e integrals' - rc = trexio_write_mo_1e_int_kinetic(f,mo_kinetic_integrals) + rc = trexio_write_mo_1e_int_kinetic(f(1),mo_kinetic_integrals) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_mo_1e_int_potential_n_e(f,mo_integrals_n_e) + rc = trexio_write_mo_1e_int_potential_n_e(f(1),mo_integrals_n_e) call trexio_assert(rc, TREXIO_SUCCESS) if (do_pseudo) then - rc = trexio_write_mo_1e_int_ecp(f,mo_pseudo_integrals_local) + rc = trexio_write_mo_1e_int_ecp(f(1),mo_pseudo_integrals_local) call trexio_assert(rc, TREXIO_SUCCESS) endif - rc = trexio_write_mo_1e_int_core_hamiltonian(f,mo_one_e_integrals) + rc = trexio_write_mo_1e_int_core_hamiltonian(f(1),mo_one_e_integrals) call trexio_assert(rc, TREXIO_SUCCESS) end if @@ -432,7 +445,7 @@ subroutine export_trexio eri_index(3,icount) = k eri_index(4,icount) = l if (icount == BUFSIZE) then - rc = trexio_write_mo_2e_int_eri(f, offset, icount, eri_index, eri_buffer) + rc = trexio_write_mo_2e_int_eri(f(1), offset, icount, eri_index, eri_buffer) call trexio_assert(rc, TREXIO_SUCCESS) offset += icount icount = 0_8 @@ -443,7 +456,7 @@ subroutine export_trexio end do if (icount > 0_8) then - rc = trexio_write_mo_2e_int_eri(f, offset, icount, eri_index, eri_buffer) + rc = trexio_write_mo_2e_int_eri(f(1), offset, icount, eri_index, eri_buffer) call trexio_assert(rc, TREXIO_SUCCESS) end if end if @@ -454,7 +467,7 @@ subroutine export_trexio if (export_mo_two_e_ints_cholesky) then print *, 'MO two-e integrals Cholesky' - rc = trexio_write_mo_2e_int_eri_cholesky_num(f, cholesky_ao_num) + rc = trexio_write_mo_2e_int_eri_cholesky_num(f(1), cholesky_ao_num) call trexio_assert(rc, TREXIO_SUCCESS) icount = 0_8 @@ -470,7 +483,7 @@ subroutine export_trexio chol_index(2,icount) = j chol_index(3,icount) = k if (icount == BUFSIZE) then - rc = trexio_write_mo_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + rc = trexio_write_mo_2e_int_eri_cholesky(f(1), offset, icount, chol_index, chol_buffer) call trexio_assert(rc, TREXIO_SUCCESS) offset += icount icount = 0_8 @@ -480,7 +493,7 @@ subroutine export_trexio end do if (icount > 0_8) then - rc = trexio_write_mo_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + rc = trexio_write_mo_2e_int_eri_cholesky(f(1), offset, icount, chol_index, chol_buffer) call trexio_assert(rc, TREXIO_SUCCESS) end if end if @@ -489,14 +502,16 @@ subroutine export_trexio ! One-e RDM ! --------- - rc = trexio_write_rdm_1e(f,one_e_dm_mo) - call trexio_assert(rc, TREXIO_SUCCESS) + do k=1,N_states + rc = trexio_write_rdm_1e(f(k),one_e_dm_mo_alpha(:,:,k) + one_e_dm_mo_beta(:,:,k)) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_rdm_1e_up(f,one_e_dm_mo_alpha_average) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_rdm_1e_up(f(k),one_e_dm_mo_alpha(:,:,k)) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_rdm_1e_dn(f,one_e_dm_mo_beta_average) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_rdm_1e_dn(f(k),one_e_dm_mo_beta(:,:,k)) + call trexio_assert(rc, TREXIO_SUCCESS) + enddo ! Two-e RDM @@ -521,7 +536,7 @@ subroutine export_trexio eri_index(3,icount) = k eri_index(4,icount) = l if (icount == BUFSIZE) then - rc = trexio_write_rdm_2e(f, offset, icount, eri_index, eri_buffer) + rc = trexio_write_rdm_2e(f(1), offset, icount, eri_index, eri_buffer) call trexio_assert(rc, TREXIO_SUCCESS) offset += icount icount = 0_8 @@ -532,7 +547,7 @@ subroutine export_trexio end do if (icount >= 0_8) then - rc = trexio_write_rdm_2e(f, offset, icount, eri_index, eri_buffer) + rc = trexio_write_rdm_2e(f(1), offset, icount, eri_index, eri_buffer) call trexio_assert(rc, TREXIO_SUCCESS) end if end if @@ -547,56 +562,60 @@ subroutine export_trexio double precision, allocatable :: coef_buffer(:,:) integer :: nint -! rc = trexio_read_determinant_int64_num(f, nint) -! call trexio_assert(rc, TREXIO_SUCCESS) - nint = N_int + rc = trexio_get_int64_num(f(1), nint) + call trexio_assert(rc, TREXIO_SUCCESS) +! nint = N_int if (nint /= N_int) then stop 'Problem with N_int' endif allocate ( det_buffer(nint, 2, BUFSIZE), coef_buffer(BUFSIZE, n_states) ) - icount = 0_8 - offset = 0_8 - rc = trexio_write_state_num(f, n_states) - call trexio_assert(rc, TREXIO_SUCCESS) + do k=1, N_states + icount = 0_8 + offset = 0_8 + rc = trexio_write_state_num(f(k), n_states) + call trexio_assert(rc, TREXIO_SUCCESS) + +! Will need to be updated with TREXIO 2.4 +! rc = trexio_write_state_id(f(k), k-1) + rc = trexio_write_state_id(f(k), k) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_state_file_name(f(k), filenames, len(filenames(1))) + call trexio_assert(rc, TREXIO_SUCCESS) + enddo - rc = trexio_set_state (f, 0) - call trexio_assert(rc, TREXIO_SUCCESS) do k=1,n_det icount += 1_8 det_buffer(1:nint, 1:2, icount) = psi_det(1:N_int, 1:2, k) coef_buffer(icount,1:N_states) = psi_coef(k,1:N_states) if (icount == BUFSIZE) then - call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_determinant_list(f, offset, icount, det_buffer) - call trexio_assert(rc, TREXIO_SUCCESS) do i=1,N_states - rc = trexio_set_state (f, i-1) + rc = trexio_write_determinant_list(f(i), offset, icount, det_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_determinant_coefficient(f(i), offset, icount, coef_buffer(1,i)) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_determinant_coefficient(f, offset, icount, coef_buffer(1,i)) end do - rc = trexio_set_state (f, 0) offset += icount icount = 0_8 end if end do if (icount >= 0_8) then - call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_determinant_list(f, offset, icount, det_buffer) - call trexio_assert(rc, TREXIO_SUCCESS) - do i=1,N_states - rc = trexio_set_state (f, i-1) - call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_determinant_coefficient(f, offset, icount, coef_buffer(1,i)) - end do - rc = trexio_set_state (f, 0) + do i=1,N_states + rc = trexio_write_determinant_list(f(i), offset, icount, det_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_determinant_coefficient(f(i), offset, icount, coef_buffer(1,i)) + call trexio_assert(rc, TREXIO_SUCCESS) + end do end if deallocate ( det_buffer, coef_buffer ) - rc = trexio_close(f) - call trexio_assert(rc, TREXIO_SUCCESS) + do k=1,N_states + rc = trexio_close(f(k)) + call trexio_assert(rc, TREXIO_SUCCESS) + enddo end diff --git a/src/two_body_rdm/two_e_dm_mo.irp.f b/src/two_body_rdm/two_e_dm_mo.irp.f index 6bd115a2..99be1f54 100644 --- a/src/two_body_rdm/two_e_dm_mo.irp.f +++ b/src/two_body_rdm/two_e_dm_mo.irp.f @@ -30,7 +30,6 @@ BEGIN_PROVIDER [double precision, two_e_dm_mo, (mo_num,mo_num,mo_num,mo_num)] enddo enddo two_e_dm_mo(:,:,:,:) = two_e_dm_mo(:,:,:,:) -! * 2.d0 END_PROVIDER From 3aae1dbf77f20ef9a2e46adb6462beabca8ab8ee Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 12 May 2023 16:06:07 +0200 Subject: [PATCH 19/29] fix completion in qp set_file --- etc/qp.rc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/qp.rc b/etc/qp.rc index d339f475..9eec4570 100644 --- a/etc/qp.rc +++ b/etc/qp.rc @@ -188,7 +188,7 @@ _qp_Complete() ;; esac;; set_file) - COMPREPLY=( $(compgen -W "$(for i in $(find . -name ezfio | sed 's/ezfio$/.version/') ; do [[ -f $i ]] && echo ${i%/.version} ; done)" -- ${cur} ) ) + COMPREPLY=( $(compgen -W "$(for i in */ $(find . -name ezfio | sed 's/ezfio$/.version/') ; do [[ -f $i ]] && echo ${i%/.version} ; done)" -- ${cur} ) ) return 0 ;; plugins) From 0fa576f90930bcf0ffc7a933a42d5667071ad3cd Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 12 May 2023 16:51:17 +0200 Subject: [PATCH 20/29] Accelerated (T) --- src/ccsd/ccsd_t_space_orb.irp.f | 244 ++++++++++++++++++++++---------- 1 file changed, 172 insertions(+), 72 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb.irp.f b/src/ccsd/ccsd_t_space_orb.irp.f index 1f1db87e..24b86972 100644 --- a/src/ccsd/ccsd_t_space_orb.irp.f +++ b/src/ccsd/ccsd_t_space_orb.irp.f @@ -8,15 +8,15 @@ subroutine ccsd_par_t_space(nO,nV,t1,t2,energy) double precision, intent(in) :: t1(nO, nV) double precision, intent(in) :: t2(nO, nO, nV, nV) double precision, intent(out) :: energy - + double precision, allocatable :: W(:,:,:,:,:,:) double precision, allocatable :: V(:,:,:,:,:,:) integer :: i,j,k,a,b,c - + allocate(W(nO,nO,nO,nV,nV,nV)) allocate(V(nO,nO,nO,nV,nV,nV)) - call form_w(nO,nV,t2,W) + call form_w(nO,nV,t2,W) call form_v(nO,nV,t1,W,V) energy = 0d0 @@ -33,9 +33,9 @@ subroutine ccsd_par_t_space(nO,nV,t1,t2,energy) enddo enddo enddo - + energy = energy / 3d0 - + deallocate(V,W) end @@ -46,7 +46,7 @@ subroutine form_w(nO,nV,t2,W) integer, intent(in) :: nO,nV double precision, intent(in) :: t2(nO, nO, nV, nV) double precision, intent(out) :: W(nO, nO, nO, nV, nV, nV) - + integer :: i,j,k,l,a,b,c,d W = 0d0 @@ -133,7 +133,7 @@ subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, intent(in) :: t2(nO,nO,nV,nV) double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) double precision, intent(out) :: energy - + double precision, allocatable :: W(:,:,:,:,:,:) double precision, allocatable :: V(:,:,:,:,:,:) double precision, allocatable :: W_ijk(:,:,:), V_ijk(:,:,:) @@ -141,7 +141,7 @@ subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, allocatable :: T_vvoo(:,:,:,:), T_ovvo(:,:,:,:), T_vo(:,:) integer :: i,j,k,l,a,b,c,d double precision :: e,ta,tb, delta, delta_ijk - + !allocate(W(nV,nV,nV,nO,nO,nO)) !allocate(V(nV,nV,nV,nO,nO,nO)) allocate(W_ijk(nV,nV,nV), V_ijk(nV,nV,nV)) @@ -154,10 +154,10 @@ subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & !$OMP PRIVATE(a,b,c,d,i,j,k,l) & !$OMP DEFAULT(NONE) - + !v_vvvo(b,a,d,i) * t2(k,j,c,d) & !X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) - + !$OMP DO collapse(3) do i = 1, nO do a = 1, nV @@ -181,7 +181,7 @@ subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo enddo !$OMP END DO nowait - + !v_vooo(c,j,k,l) * t2(i,l,a,b) & !X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & @@ -208,10 +208,10 @@ subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo enddo !$OMP END DO nowait - + !v_vvoo(b,c,j,k) * t1(i,a) & !X_vvoo(b,c,k,j) * T1_vo(a,i) & - + !$OMP DO collapse(3) do j = 1, nO do k = 1, nO @@ -267,7 +267,7 @@ subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) call wall_time(tb) write(*,'(F12.2,A5,F12.2,A2)') dble(i)/dble(nO)*100d0, '% in ', tb - ta, ' s' enddo - + energy = energy / 3d0 deallocate(W_ijk,V_ijk,X_vvvo,X_ovoo,T_vvoo,T_ovvo,T_vo) @@ -285,78 +285,178 @@ subroutine form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) double precision, intent(in) :: T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO) double precision, intent(in) :: X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO) double precision, intent(out) :: W(nV,nV,nV)!,nO,nO,nO) - + integer :: l,a,b,c,d + double precision, allocatable, dimension(:,:,:) :: X, Y, Z !W = 0d0 !do i = 1, nO ! do j = 1, nO ! do k = 1, nO - !$OMP PARALLEL & - !$OMP SHARED(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) & - !$OMP PRIVATE(a,b,c,d,l) & - !$OMP DEFAULT(NONE) - !$OMP DO collapse(2) - do c = 1, nV - do b = 1, nV - do a = 1, nV - W(a,b,c) = 0d0 + allocate(X(nV,nV,nV)) + allocate(Y(nV,nV,nV)) + allocate(Z(nV,nV,nV)) - do d = 1, nV - !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & - W(a,b,c) = W(a,b,c) & - ! chem (bd|ai) - ! phys - !+ cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) & - !+ cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj - !+ cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik - !+ cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij - !+ cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj - !+ cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik - + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) & - + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & ! bc kj - + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & ! prev ac ik - + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & ! prev ab ij - + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & ! prev bc kj - + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) ! prev ac ik - enddo - + !$OMP PARALLEL DO + do b = 1, nV + do a = 1, nV + do d = 1, nV + Z(d,a,b) = X_vvvo(d,b,a,i) enddo enddo enddo - !$OMP END DO nowait + !$OMP END PARALLEL DO - !$OMP DO collapse(2) + call dgemm('T','N',nV*nV,nV,nV, 1.d0, & + Z, nV, T_vvoo(1,1,k,j), nV, 0.d0, W, nV*nV) + + !$OMP PARALLEL DO do c = 1, nV - do b = 1, nV - do a = 1, nV - - do l = 1, nO - !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & - W(a,b,c) = W(a,b,c) & - ! chem (ck|jl) - ! phys - !- cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) & - !- cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj - !- cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik - !- cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij - !- cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj - !- cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik - - X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & - - X_ovoo(l,b,k,j) * T_ovvo(l,a,c,i) & ! bc kj - - X_ovoo(l,b,i,j) * T_ovvo(l,c,a,k) & ! prev ac ik - - X_ovoo(l,a,j,i) * T_ovvo(l,c,b,k) & ! prev ab ij - - X_ovoo(l,a,k,i) * T_ovvo(l,b,c,j) & ! prev bc kj - - X_ovoo(l,c,i,k) * T_ovvo(l,b,a,j) ! prev ac ik - enddo - + do a = 1, nV + do d = 1, nV + Z(d,a,c) = X_vvvo(d,c,a,i) enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL - + !$OMP END PARALLEL DO + + call dgemm('T','N',nV*nV,nV,nV, 1.d0, & + Z, nV, T_vvoo(1,1,j,k), nV, 0.d0, Y, nV*nV) + + call dgemm('T','N',nV*nV,nV,nV, 1.d0, & + X_vvvo(1,1,1,k), nV, T_vvoo(1,1,j,i), nV, 1.d0, Y, nV*nV) + + call dgemm('T','N',nV,nV*nV,nV, 1.d0, & + T_vvoo(1,1,i,j), nV, X_vvvo(1,1,1,k), nV, 1.d0, W, nV) + + call dgemm('T','N',nV,nV*nV,nV, 1.d0, & + T_vvoo(1,1,i,k), nV, X_vvvo(1,1,1,j), nV, 1.d0, Y, nV) + + call dgemm('T','N',nV*nV,nV,nV, 1.d0, & + X_vvvo(1,1,1,j), nV, T_vvoo(1,1,k,i), nV, 1.d0, W, nV*nV) + + deallocate(Z) + + + allocate(Z(nO,nV,nV)) + + call dgemm('T','N',nV*nV,nV,nO, -1.d0, & + T_ovvo(1,1,1,i), nO, X_ovoo(1,1,j,k), nO, 1.d0, W, nV*nV) + + call dgemm('T','N',nV*nV,nV,nO, -1.d0, & + T_ovvo(1,1,1,i), nO, X_ovoo(1,1,k,j), nO, 1.d0, Y, nV*nV) + + !$OMP PARALLEL DO + do c = 1, nV + do a = 1, nV + do l = 1, nO + Z(l,a,c) = T_ovvo(l,c,a,k) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('T','N',nV*nV,nV,nO, -1.d0, & + Z, nO, X_ovoo(1,1,i,j), nO, 1.d0, Y, nV*nV) + + call dgemm('T','N',nV,nV*nV,nO, -1.d0, & + X_ovoo(1,1,j,i), nO, T_ovvo(1,1,1,k), nO, 1.d0, Y, nV) + + call dgemm('T','N',nV,nV*nV,nO, -1.d0, & + X_ovoo(1,1,k,i), nO, T_ovvo(1,1,1,j), nO, 1.d0, W, nV) + + !$OMP PARALLEL DO + do b = 1, nV + do a = 1, nV + do l = 1, nO + Z(l,a,b) = T_ovvo(l,b,a,j) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('T','N',nV*nV,nV,nO, -1.d0, & + Z, nO, X_ovoo(1,1,i,k), nO, 1.d0, W, nV*nV) + + !$OMP PARALLEL DO + do c = 1, nV + do b = 1, nV + do a = 1, nV + W(a,b,c) = W(a,b,c) + Y(a,c,b) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(X,Y,Z) + + +! !$OMP PARALLEL & +! !$OMP SHARED(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) & +! !$OMP PRIVATE(a,b,c,d,l) & +! !$OMP DEFAULT(NONE) +! +! !$OMP DO collapse(2) +! do c = 1, nV +! do b = 1, nV +! do a = 1, nV +! W(a,b,c) = 0.d0 +! +! do d = 1, nV +! !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & +! W(a,b,c) = W(a,b,c) & +! ! chem (bd|ai) +! ! phys +! !+ cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) & +! !+ cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj +! !+ cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik +! !+ cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij +! !+ cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj +! !+ cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik +! + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) & +! + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & ! bc kj +! + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & ! prev ac ik +! + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & ! prev ab ij +! + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & ! prev bc kj +! + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) ! prev ac ik +! enddo +! +! enddo +! enddo +! enddo +! !$OMP END DO nowait +! +! !$OMP DO collapse(2) +! do c = 1, nV +! do b = 1, nV +! do a = 1, nV +! +! do l = 1, nO +! !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & +! W(a,b,c) = W(a,b,c) & +! ! chem (ck|jl) +! ! phys +! !- cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) & +! !- cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj +! !- cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik +! !- cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij +! !- cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj +! !- cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik +! - T_ovvo(l,a,b,i) * X_ovoo(l,c,j,k) & +! - T_ovvo(l,a,c,i) * X_ovoo(l,b,k,j) & ! bc kj +! - T_ovvo(l,c,a,k) * X_ovoo(l,b,i,j) & ! prev ac ik +! - T_ovvo(l,c,b,k) * X_ovoo(l,a,j,i) & ! prev ab ij +! - T_ovvo(l,b,c,j) * X_ovoo(l,a,k,i) & ! prev bc kj +! - T_ovvo(l,b,a,j) * X_ovoo(l,c,i,k) ! prev ac ik +! enddo +! +! enddo +! enddo +! enddo +! !$OMP END DO +! !$OMP END PARALLEL + ! enddo ! enddo !enddo @@ -382,7 +482,7 @@ implicit none !do i = 1, nO ! do j = 1, nO ! do k = 1, nO - + !$OMP PARALLEL & !$OMP SHARED(nO,nV,i,j,k,T_vo,X_vvoo,W,V) & !$OMP PRIVATE(a,b,c) & @@ -404,7 +504,7 @@ implicit none enddo !$OMP END DO !$OMP END PARALLEL - + ! enddo ! enddo !enddo From 69a76c6dba05188e1856c0e195ada4daa25984f5 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 12 May 2023 19:17:07 +0200 Subject: [PATCH 21/29] Added W_abc routines for (T) --- src/ccsd/ccsd_t_space_orb.irp.f | 5 +- src/ccsd/ccsd_t_space_orb_abc.irp.f | 252 ++++++++++++++++++++++++++++ 2 files changed, 255 insertions(+), 2 deletions(-) create mode 100644 src/ccsd/ccsd_t_space_orb_abc.irp.f diff --git a/src/ccsd/ccsd_t_space_orb.irp.f b/src/ccsd/ccsd_t_space_orb.irp.f index 24b86972..37f2b484 100644 --- a/src/ccsd/ccsd_t_space_orb.irp.f +++ b/src/ccsd/ccsd_t_space_orb.irp.f @@ -257,7 +257,7 @@ subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo enddo enddo - !$OMP END DO + !$OMP END DO NOWAIT !$OMP CRITICAL energy = energy + e !$OMP END CRITICAL @@ -426,7 +426,7 @@ subroutine form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) ! enddo ! enddo ! !$OMP END DO nowait -! +! ! !$OMP DO collapse(2) ! do c = 1, nV ! do b = 1, nV @@ -510,3 +510,4 @@ implicit none !enddo end + diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f new file mode 100644 index 00000000..3b762a06 --- /dev/null +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -0,0 +1,252 @@ +! Main + +subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: W(:,:,:,:,:,:) + double precision, allocatable :: V(:,:,:,:,:,:) + double precision, allocatable :: W_abc(:,:,:), V_abc(:,:,:) + double precision, allocatable :: W_cab(:,:,:), W_cba(:,:,:) + double precision, allocatable :: W_bca(:,:,:), V_cba(:,:,:) + double precision, allocatable :: X_vvvo(:,:,:,:), X_ovoo(:,:,:,:), X_vvoo(:,:,:,:) + double precision, allocatable :: T_vvoo(:,:,:,:), T_ovvo(:,:,:,:), T_vo(:,:) + integer :: i,j,k,l,a,b,c,d + double precision :: e,ta,tb, delta, delta_abc + + !allocate(W(nV,nV,nV,nO,nO,nO)) + !allocate(V(nV,nV,nV,nO,nO,nO)) + allocate(W_abc(nO,nO,nO), V_abc(nO,nO,nO), W_cab(nO,nO,nO)) + allocate(W_bca(nO,nO,nO), V_cba(nO,nO,nO), W_cba(nO,nO,nO)) + allocate(X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO), X_vvoo(nV,nV,nO,nO)) + allocate(T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO), T_vo(nV,nO)) + + ! Temporary arrays + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,T_vvoo,T_ovvo,T_vo,X_vvvo,X_ovoo,X_vvoo, & + !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & + !$OMP PRIVATE(a,b,c,d,i,j,k,l) & + !$OMP DEFAULT(NONE) + + !v_vvvo(b,a,d,i) * t2(k,j,c,d) & + !X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) + + !$OMP DO collapse(3) + do i = 1, nO + do a = 1, nV + do b = 1, nV + do d = 1, nV + X_vvvo(d,b,a,i) = v_vvvo(b,a,d,i) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(3) + do j = 1, nO + do k = 1, nO + do c = 1, nV + do d = 1, nV + T_vvoo(d,c,k,j) = t2(k,j,c,d) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vooo(c,j,k,l) * t2(i,l,a,b) & + !X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & + + !$OMP DO collapse(3) + do k = 1, nO + do j = 1, nO + do c = 1, nV + do l = 1, nO + X_ovoo(l,c,j,k) = v_vooo(c,j,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(3) + do i = 1, nO + do b = 1, nV + do a = 1, nV + do l = 1, nO + T_ovvo(l,a,b,i) = t2(i,l,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vvoo(b,c,j,k) * t1(i,a) & + !X_vvoo(b,c,k,j) * T1_vo(a,i) & + + !$OMP DO collapse(3) + do j = 1, nO + do k = 1, nO + do c = 1, nV + do b = 1, nV + X_vvoo(b,c,k,j) = v_vvoo(b,c,j,k) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(1) + do i = 1, nO + do a = 1, nV + T_vo(a,i) = t1(i,a) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(ta) + energy = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + call form_w_abc(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) + call form_w_abc(nO,nV,b,c,a,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_bca) + call form_w_abc(nO,nV,c,a,b,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_cab) + call form_w_abc(nO,nV,c,b,a,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_cba) + + call form_v_abc(nO,nV,a,b,c,T_vo,X_vvoo,W_abc,V_abc) + call form_v_abc(nO,nV,c,b,a,T_vo,X_vvoo,W_cba,V_cba) + !$OMP PARALLEL & + !$OMP SHARED(energy,nO,a,b,c,W_abc,W_cab,W_bca,V_abc,V_cba,f_o,f_v,delta_abc)& + !$OMP PRIVATE(i,j,k,e,delta) & + !$OMP DEFAULT(NONE) + e = 0d0 + !$OMP DO + do i = 1, nO + do j = 1, nO + do k = 1, nO + delta = 1d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + !energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c)) !delta_ooovvv(i,j,k,a,b,c) + e = e + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k))& + * (V_abc(i,j,k) - V_cba(i,j,k)) * delta + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + energy = energy + e + !$OMP END CRITICAL + !$OMP END PARALLEL + enddo + enddo + call wall_time(tb) + write(*,'(F12.2,A5,F12.2,A2)') dble(i)/dble(nO)*100d0, '% in ', tb - ta, ' s' + enddo + + energy = energy / 3d0 + + deallocate(W_abc,V_abc,W_cab,V_cba,W_bca,X_vvvo,X_ovoo,T_vvoo,T_ovvo,T_vo) + !deallocate(V,W) +end + + +subroutine form_w_abc(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) + + implicit none + + integer, intent(in) :: nO,nV,a,b,c + !double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO) + double precision, intent(in) :: X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO) + double precision, intent(out) :: W_abc(nO,nO,nO) + + integer :: l,i,j,k,d + + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) & + !$OMP PRIVATE(i,j,k,d,l) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do k = 1, nO + do j = 1, nO + do i = 1, nO + W_abc(i,j,k) = 0.d0 + + do d = 1, nV + W_abc(i,j,k) = W_abc(i,j,k) & + + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) & + + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & + + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & + + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & + + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & + + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) + + enddo + + do l = 1, nO + W_abc(i,j,k) = W_abc(i,j,k) & + - T_ovvo(l,a,b,i) * X_ovoo(l,c,j,k) & + - T_ovvo(l,a,c,i) * X_ovoo(l,b,k,j) & ! bc kj + - T_ovvo(l,c,a,k) * X_ovoo(l,b,i,j) & ! prev ac ik + - T_ovvo(l,c,b,k) * X_ovoo(l,a,j,i) & ! prev ab ij + - T_ovvo(l,b,c,j) * X_ovoo(l,a,k,i) & ! prev bc kj + - T_ovvo(l,b,a,j) * X_ovoo(l,c,i,k) ! prev ac ik + enddo + + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + +end + + +! V_abc + +subroutine form_v_abc(nO,nV,a,b,c,T_vo,X_vvoo,W,V) + +implicit none + + integer, intent(in) :: nO,nV,a,b,c + !double precision, intent(in) :: t1(nO,nV) + double precision, intent(in) :: T_vo(nV,nO) + double precision, intent(in) :: X_vvoo(nV,nV,nO,nO) + double precision, intent(in) :: W(nO,nO,nO) + double precision, intent(out) :: V(nO,nO,nO) + + integer :: i,j,k + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,a,b,c,T_vo,X_vvoo,W,V) & + !$OMP PRIVATE(i,j,k) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do k = 1, nO + do j = 1, nO + do i = 1, nO + !V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) & + V(i,j,k) = W(i,j,k) & + + X_vvoo(b,c,k,j) * T_vo(a,i) & + + X_vvoo(a,c,k,i) * T_vo(b,j) & + + X_vvoo(a,b,j,i) * T_vo(c,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + From 85ca8854188289b34010eb431813919df5507aed Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 12 May 2023 19:20:08 +0200 Subject: [PATCH 22/29] Fixing github actions --- .github/workflows/compilation.yml | 1 + .github/workflows/configuration.yml | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/compilation.yml b/.github/workflows/compilation.yml index 42710ce5..85daf7db 100644 --- a/.github/workflows/compilation.yml +++ b/.github/workflows/compilation.yml @@ -48,6 +48,7 @@ jobs: ./configure -i docopt || : ./configure -i resultsFile || : ./configure -i bats || : + ./configure -i trexio-nohdf5 || : ./configure -c ./config/gfortran_debug.cfg - name: Compilation run: | diff --git a/.github/workflows/configuration.yml b/.github/workflows/configuration.yml index 14019e5d..ba37f5dd 100644 --- a/.github/workflows/configuration.yml +++ b/.github/workflows/configuration.yml @@ -22,7 +22,7 @@ jobs: - uses: actions/checkout@v3 - name: Install dependencies run: | - sudo apt install gfortran gcc liblapack-dev libblas-dev wget python3 make m4 pkg-config + sudo apt install gfortran gcc liblapack-dev libblas-dev wget python3 make m4 pkg-config hdf5 - name: zlib run: | ./configure -i zlib || echo OK @@ -50,6 +50,12 @@ jobs: - name: bats run: | ./configure -i bats || echo OK + - name: trexio-nohdf5 + run: | + ./configure -i trexio-nohdf5 || echo OK + - name: trexio + run: | + ./configure -i trexio || echo OK - name: Final check run: | ./configure -c config/gfortran_debug.cfg From 6d1cf74e09c10ecd2ba19f9952f2f466a7ad4874 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 12 May 2023 19:58:08 +0200 Subject: [PATCH 23/29] Added NEED in trexio --- src/trexio/NEED | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 src/trexio/NEED diff --git a/src/trexio/NEED b/src/trexio/NEED new file mode 100644 index 00000000..625463a2 --- /dev/null +++ b/src/trexio/NEED @@ -0,0 +1,8 @@ +ezfio_files +determinants +mo_one_e_ints +mo_two_e_ints +ao_two_e_ints +ao_one_e_ints +two_body_rdm +hartree_fock From a45fe53a9c61dce28602ed086b1d518be00e05aa Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 13 May 2023 09:15:34 +0200 Subject: [PATCH 24/29] compilation on lcpq is fine --- src/cosgtos_ao_int/NEED | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 src/cosgtos_ao_int/NEED diff --git a/src/cosgtos_ao_int/NEED b/src/cosgtos_ao_int/NEED new file mode 100644 index 00000000..932f88a3 --- /dev/null +++ b/src/cosgtos_ao_int/NEED @@ -0,0 +1,2 @@ +ezfio_files +ao_basis From 8cefe6eb441ee16e2710064ed38d11779dbc7d26 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 13 May 2023 12:18:36 +0200 Subject: [PATCH 25/29] save tc coef --- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 20 ++++++++++++++++++++ src/tc_scf/rotate_tcscf_orbitals.irp.f | 3 ++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index 3140d229..a83d6cd0 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -298,6 +298,26 @@ end print*,' = ', s2_eigvec_tc_bi_orth(i) enddo + double precision, allocatable :: buffer(:,:) + allocate(buffer(N_det,N_states)) + do k = 1, N_states + do i = 1, N_det + psi_l_coef_bi_ortho(i,k) = leigvec_tc_bi_orth(i,k) + buffer(i,k) = leigvec_tc_bi_orth(i,k) + enddo + enddo + TOUCH psi_l_coef_bi_ortho + call ezfio_set_tc_bi_ortho_psi_l_coef_bi_ortho(buffer) + do k = 1, N_states + do i = 1, N_det + psi_r_coef_bi_ortho(i,k) = reigvec_tc_bi_orth(i,k) + buffer(i,k) = reigvec_tc_bi_orth(i,k) + enddo + enddo + TOUCH psi_r_coef_bi_ortho + call ezfio_set_tc_bi_ortho_psi_r_coef_bi_ortho(buffer) + deallocate(buffer) + END_PROVIDER diff --git a/src/tc_scf/rotate_tcscf_orbitals.irp.f b/src/tc_scf/rotate_tcscf_orbitals.irp.f index 31999c18..2567faf0 100644 --- a/src/tc_scf/rotate_tcscf_orbitals.irp.f +++ b/src/tc_scf/rotate_tcscf_orbitals.irp.f @@ -17,7 +17,8 @@ program rotate_tcscf_orbitals bi_ortho = .True. touch bi_ortho - call maximize_overlap() + call minimize_tc_orb_angles() + !call maximize_overlap() end From 19f2ede59c95cee5e34a49d960b894c5765805fe Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 13 May 2023 21:43:01 +0200 Subject: [PATCH 26/29] check TC energy after rotations --- src/tc_scf/routines_rotates.irp.f | 31 ++++++++++++++++++++++++++++++- src/tc_scf/tc_scf.irp.f | 2 +- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/tc_scf/routines_rotates.irp.f b/src/tc_scf/routines_rotates.irp.f index 8c1071b2..4ac66b5f 100644 --- a/src/tc_scf/routines_rotates.irp.f +++ b/src/tc_scf/routines_rotates.irp.f @@ -93,14 +93,22 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) integer :: i, j, k, n_degen_list, m, n, n_degen, ilast, ifirst double precision :: max_angle, norm + double precision :: E_old, E_new, E_thr integer, allocatable :: list_degen(:,:) double precision, allocatable :: new_angles(:) + double precision, allocatable :: mo_r_coef_old(:,:), mo_l_coef_old(:,:) double precision, allocatable :: mo_r_coef_good(:,:), mo_l_coef_good(:,:) double precision, allocatable :: mo_r_coef_new(:,:) - double precision, allocatable :: fock_diag(:),s_mat(:,:) + double precision, allocatable :: fock_diag(:), s_mat(:,:) double precision, allocatable :: stmp(:,:), T(:,:), Snew(:,:), smat2(:,:) double precision, allocatable :: mo_l_coef_tmp(:,:), mo_r_coef_tmp(:,:), mo_l_coef_new(:,:) + E_thr = 1d-8 + E_old = TC_HF_energy + allocate(mo_l_coef_old(ao_num,mo_num), mo_r_coef_old(ao_num,mo_num)) + mo_r_coef_old = mo_r_coef + mo_l_coef_old = mo_l_coef + good_angles = .False. allocate(mo_l_coef_good(ao_num, mo_num), mo_r_coef_good(ao_num,mo_num)) @@ -253,11 +261,32 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) TOUCH mo_l_coef mo_r_coef + ! check if TC energy has changed + E_new = TC_HF_energy + if(dabs(E_new - E_old) .gt. E_thr) then + mo_r_coef = mo_r_coef_old + mo_l_coef = mo_l_coef_old + deallocate(mo_l_coef_old, mo_r_coef_old) + call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) + call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) + TOUCH mo_l_coef mo_r_coef + print*, ' TC energy bef rotation = ', E_old + print*, ' TC energy aft rotation = ', E_new + print*, ' the rotation is refused' + stop + endif + allocate(new_angles(mo_num)) new_angles(1:mo_num) = dabs(angle_left_right(1:mo_num)) max_angle = maxval(new_angles) good_angles = max_angle.lt.45.d0 print *, ' max_angle = ', max_angle + deallocate(new_angles) + + + deallocate(mo_l_coef_old, mo_r_coef_old) + deallocate(mo_l_coef_good, mo_r_coef_good) + deallocate(mo_r_coef_new) end diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index 2485ee8b..88ddd26c 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -53,7 +53,7 @@ program tc_scf stop endif - !call minimize_tc_orb_angles() + call minimize_tc_orb_angles() endif From c9f579483af138575cb7a37b9cf1fdb39f22c2fa Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 15 May 2023 00:31:28 +0200 Subject: [PATCH 27/29] added Gill grid --- src/ao_many_one_e_ints/listj1b.irp.f | 24 +- src/becke_numerical_grid/EZFIO.cfg | 12 + src/becke_numerical_grid/extra_grid.irp.f | 278 +++++++++++----- .../extra_grid_vector.irp.f | 15 +- src/becke_numerical_grid/grid_becke.irp.f | 314 ++++++++++++------ .../grid_becke_vector.irp.f | 35 +- .../integration_radial.irp.f | 142 ++++---- .../step_function_becke.irp.f | 43 ++- src/non_h_ints_mu/j12_nucl_utils.irp.f | 4 +- 9 files changed, 570 insertions(+), 297 deletions(-) diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f index 93ac459e..02963605 100644 --- a/src/ao_many_one_e_ints/listj1b.irp.f +++ b/src/ao_many_one_e_ints/listj1b.irp.f @@ -7,17 +7,17 @@ BEGIN_PROVIDER [integer, List_all_comb_b2_size] PROVIDE j1b_type - if(j1b_type .eq. 3) then + if((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then List_all_comb_b2_size = 2**nucl_num - elseif(j1b_type .eq. 4) then + elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then List_all_comb_b2_size = nucl_num + 1 else - print *, 'j1b_type = ', j1b_pen, 'is not implemented' + print *, 'j1b_type = ', j1b_type, 'is not implemented' stop endif @@ -67,7 +67,7 @@ END_PROVIDER List_all_comb_b2_expo = 0.d0 List_all_comb_b2_cent = 0.d0 - if(j1b_type .eq. 3) then + if((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then do i = 1, List_all_comb_b2_size @@ -121,7 +121,7 @@ END_PROVIDER List_all_comb_b2_coef(i) = (-1.d0)**dble(phase) * dexp(-List_all_comb_b2_coef(i)) enddo - elseif(j1b_type .eq. 4) then + elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then List_all_comb_b2_coef( 1) = 1.d0 List_all_comb_b2_expo( 1) = 0.d0 @@ -136,7 +136,7 @@ END_PROVIDER else - print *, 'j1b_type = ', j1b_pen, 'is not implemented' + print *, 'j1b_type = ', j1b_type, 'is not implemented' stop endif @@ -156,18 +156,18 @@ BEGIN_PROVIDER [ integer, List_all_comb_b3_size] implicit none double precision :: tmp - if(j1b_type .eq. 3) then + if((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then List_all_comb_b3_size = 3**nucl_num - elseif(j1b_type .eq. 4) then + elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then tmp = 0.5d0 * dble(nucl_num) * (dble(nucl_num) + 3.d0) List_all_comb_b3_size = int(tmp) + 1 else - print *, 'j1b_type = ', j1b_pen, 'is not implemented' + print *, 'j1b_type = ', j1b_type, 'is not implemented' stop endif @@ -230,7 +230,7 @@ END_PROVIDER List_all_comb_b3_expo = 0.d0 List_all_comb_b3_cent = 0.d0 - if(j1b_type .eq. 3) then + if((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then do i = 1, List_all_comb_b3_size @@ -287,7 +287,7 @@ END_PROVIDER List_all_comb_b3_coef(i) = (-1.d0)**dble(phase) * facto * dexp(-List_all_comb_b3_coef(i)) enddo - elseif(j1b_type .eq. 4) then + elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then ii = 1 List_all_comb_b3_coef( ii) = 1.d0 @@ -347,7 +347,7 @@ END_PROVIDER else - print *, 'j1b_type = ', j1b_pen, 'is not implemented' + print *, 'j1b_type = ', j1b_type, 'is not implemented' stop endif diff --git a/src/becke_numerical_grid/EZFIO.cfg b/src/becke_numerical_grid/EZFIO.cfg index 4083e0e7..7861f074 100644 --- a/src/becke_numerical_grid/EZFIO.cfg +++ b/src/becke_numerical_grid/EZFIO.cfg @@ -64,3 +64,15 @@ doc: Number of angular extra_grid points given from input. Warning, this number interface: ezfio,provider,ocaml default: 1202 +[rad_grid_type] +type: character*(32) +doc: method used to sample the radial space. Possible choices are [KNOWLES | GILL] +interface: ezfio,provider,ocaml +default: KNOWLES + +[extra_rad_grid_type] +type: character*(32) +doc: method used to sample the radial space. Possible choices are [KNOWLES | GILL] +interface: ezfio,provider,ocaml +default: KNOWLES + diff --git a/src/becke_numerical_grid/extra_grid.irp.f b/src/becke_numerical_grid/extra_grid.irp.f index db691e55..9bd24f22 100644 --- a/src/becke_numerical_grid/extra_grid.irp.f +++ b/src/becke_numerical_grid/extra_grid.irp.f @@ -1,96 +1,149 @@ +! --- + BEGIN_PROVIDER [integer, n_points_extra_radial_grid] &BEGIN_PROVIDER [integer, n_points_extra_integration_angular] - implicit none - BEGIN_DOC - ! n_points_extra_radial_grid = number of radial grid points_extra per atom - ! - ! n_points_extra_integration_angular = number of angular grid points_extra per atom - ! - ! These numbers are automatically set by setting the grid_type_sgn parameter - END_DOC -if(.not.my_extra_grid_becke)then - select case (extra_grid_type_sgn) - case(0) - n_points_extra_radial_grid = 23 - n_points_extra_integration_angular = 170 - case(1) - n_points_extra_radial_grid = 50 - n_points_extra_integration_angular = 194 - case(2) - n_points_extra_radial_grid = 75 - n_points_extra_integration_angular = 302 - case(3) - n_points_extra_radial_grid = 99 - n_points_extra_integration_angular = 590 - case default - write(*,*) '!!! Quadrature grid not available !!!' - stop - end select -else - n_points_extra_radial_grid = my_n_pt_r_extra_grid - n_points_extra_integration_angular = my_n_pt_a_extra_grid -endif + + BEGIN_DOC + ! n_points_extra_radial_grid = number of radial grid points_extra per atom + ! + ! n_points_extra_integration_angular = number of angular grid points_extra per atom + ! + ! These numbers are automatically set by setting the grid_type_sgn parameter + END_DOC + + implicit none + + if(.not.my_extra_grid_becke)then + select case (extra_grid_type_sgn) + case(0) + n_points_extra_radial_grid = 23 + n_points_extra_integration_angular = 170 + case(1) + n_points_extra_radial_grid = 50 + n_points_extra_integration_angular = 194 + case(2) + n_points_extra_radial_grid = 75 + n_points_extra_integration_angular = 302 + case(3) + n_points_extra_radial_grid = 99 + n_points_extra_integration_angular = 590 + case default + write(*,*) '!!! Quadrature grid not available !!!' + stop + end select + else + n_points_extra_radial_grid = my_n_pt_r_extra_grid + n_points_extra_integration_angular = my_n_pt_a_extra_grid + endif + END_PROVIDER +! --- + BEGIN_PROVIDER [integer, n_points_extra_grid_per_atom] - implicit none + BEGIN_DOC ! Number of grid points_extra per atom END_DOC + + implicit none n_points_extra_grid_per_atom = n_points_extra_integration_angular * n_points_extra_radial_grid END_PROVIDER +! --- + BEGIN_PROVIDER [double precision, grid_points_extra_radial, (n_points_extra_radial_grid)] &BEGIN_PROVIDER [double precision, dr_radial_extra_integral] - implicit none BEGIN_DOC ! points_extra in [0,1] to map the radial integral [0,\infty] END_DOC + + implicit none + integer :: i + dr_radial_extra_integral = 1.d0/dble(n_points_extra_radial_grid-1) - integer :: i do i = 1, n_points_extra_radial_grid grid_points_extra_radial(i) = dble(i-1) * dr_radial_extra_integral enddo END_PROVIDER +! --- + BEGIN_PROVIDER [double precision, grid_points_extra_per_atom, (3,n_points_extra_integration_angular,n_points_extra_radial_grid,nucl_num)] + BEGIN_DOC ! x,y,z coordinates of grid points_extra used for integration in 3d space END_DOC + implicit none - integer :: i,j,k - double precision :: dr,x_ref,y_ref,z_ref - double precision :: knowles_function - do i = 1, nucl_num - x_ref = nucl_coord(i,1) - y_ref = nucl_coord(i,2) - z_ref = nucl_coord(i,3) - do j = 1, n_points_extra_radial_grid-1 - double precision :: x,r - ! x value for the mapping of the [0, +\infty] to [0,1] - x = grid_points_extra_radial(j) + integer :: i, j, k + double precision :: dr, x_ref, y_ref, z_ref + double precision :: x, r, tmp + double precision, external :: knowles_function - ! value of the radial coordinate for the integration - r = knowles_function(alpha_knowles(grid_atomic_number(i)),m_knowles,x) + grid_points_extra_per_atom = 0.d0 - ! explicit values of the grid points_extra centered around each atom - do k = 1, n_points_extra_integration_angular - grid_points_extra_per_atom(1,k,j,i) = & - x_ref + angular_quadrature_points_extra(k,1) * r - grid_points_extra_per_atom(2,k,j,i) = & - y_ref + angular_quadrature_points_extra(k,2) * r - grid_points_extra_per_atom(3,k,j,i) = & - z_ref + angular_quadrature_points_extra(k,3) * r + PROVIDE extra_rad_grid_type + if(extra_rad_grid_type .eq. "KNOWLES") then + + do i = 1, nucl_num + x_ref = nucl_coord(i,1) + y_ref = nucl_coord(i,2) + z_ref = nucl_coord(i,3) + do j = 1, n_points_extra_radial_grid-1 + + ! x value for the mapping of the [0, +\infty] to [0,1] + x = grid_points_extra_radial(j) + ! value of the radial coordinate for the integration + r = knowles_function(alpha_knowles(grid_atomic_number(i)), m_knowles, x) + + ! explicit values of the grid points_extra centered around each atom + do k = 1, n_points_extra_integration_angular + grid_points_extra_per_atom(1,k,j,i) = x_ref + angular_quadrature_points_extra(k,1) * r + grid_points_extra_per_atom(2,k,j,i) = y_ref + angular_quadrature_points_extra(k,2) * r + grid_points_extra_per_atom(3,k,j,i) = z_ref + angular_quadrature_points_extra(k,3) * r + enddo enddo enddo - enddo + + elseif(extra_rad_grid_type .eq. "GILL") then + ! GILL & CHIEN, 2002 + + do i = 1, nucl_num + x_ref = nucl_coord(i,1) + y_ref = nucl_coord(i,2) + z_ref = nucl_coord(i,3) + do j = 1, n_points_extra_radial_grid-1 + + r = R_gill * dble(j-1)**2 / dble(n_points_extra_radial_grid-j+1)**2 + + ! explicit values of the grid points_extra centered around each atom + do k = 1, n_points_extra_integration_angular + grid_points_extra_per_atom(1,k,j,i) = x_ref + angular_quadrature_points_extra(k,1) * r + grid_points_extra_per_atom(2,k,j,i) = y_ref + angular_quadrature_points_extra(k,2) * r + grid_points_extra_per_atom(3,k,j,i) = z_ref + angular_quadrature_points_extra(k,3) * r + enddo + enddo + enddo + + else + + print*, " extra_rad_grid_type = ", extra_rad_grid_type, ' is not implemented' + stop + + endif + + END_PROVIDER +! --- + BEGIN_PROVIDER [double precision, weight_at_r_extra, (n_points_extra_integration_angular,n_points_extra_radial_grid,nucl_num) ] + BEGIN_DOC ! Weight function at grid points_extra : w_n(r) according to the equation (22) ! of Becke original paper (JCP, 88, 1988) @@ -99,11 +152,14 @@ BEGIN_PROVIDER [double precision, weight_at_r_extra, (n_points_extra_integration ! represented by the last dimension and the points_extra are labelled by the ! other dimensions. END_DOC + implicit none - integer :: i,j,k,l,m - double precision :: r(3) - double precision :: accu,cell_function_becke - double precision :: tmp_array(nucl_num) + integer :: i, j, k, l, m + double precision :: r(3) + double precision :: accu + double precision :: tmp_array(nucl_num) + double precision, external :: cell_function_becke + ! run over all points_extra in space ! that are referred to each atom do j = 1, nucl_num @@ -114,6 +170,7 @@ BEGIN_PROVIDER [double precision, weight_at_r_extra, (n_points_extra_integration r(1) = grid_points_extra_per_atom(1,l,k,j) r(2) = grid_points_extra_per_atom(2,l,k,j) r(3) = grid_points_extra_per_atom(3,l,k,j) + accu = 0.d0 ! For each of these points_extra in space, ou need to evaluate the P_n(r) do i = 1, nucl_num @@ -124,18 +181,19 @@ BEGIN_PROVIDER [double precision, weight_at_r_extra, (n_points_extra_integration enddo accu = 1.d0/accu weight_at_r_extra(l,k,j) = tmp_array(j) * accu + if(isnan(weight_at_r_extra(l,k,j)))then - print*,'isnan(weight_at_r_extra(l,k,j))' - print*,l,k,j - accu = 0.d0 - do i = 1, nucl_num - ! function defined for each atom "i" by equation (13) and (21) with k == 3 - tmp_array(i) = cell_function_becke(r,i) ! P_n(r) - print*,i,tmp_array(i) - ! Then you compute the summ the P_n(r) function for each of the "r" points_extra - accu += tmp_array(i) - enddo - write(*,'(100(F16.10,X))')tmp_array(j) , accu + print*,'isnan(weight_at_r_extra(l,k,j))' + print*,l,k,j + accu = 0.d0 + do i = 1, nucl_num + ! function defined for each atom "i" by equation (13) and (21) with k == 3 + tmp_array(i) = cell_function_becke(r,i) ! P_n(r) + print*,i,tmp_array(i) + ! Then you compute the summ the P_n(r) function for each of the "r" points_extra + accu += tmp_array(i) + enddo + write(*,'(100(F16.10,X))')tmp_array(j) , accu stop endif enddo @@ -144,35 +202,73 @@ BEGIN_PROVIDER [double precision, weight_at_r_extra, (n_points_extra_integration END_PROVIDER +! --- + BEGIN_PROVIDER [double precision, final_weight_at_r_extra, (n_points_extra_integration_angular,n_points_extra_radial_grid,nucl_num) ] + BEGIN_DOC ! Total weight on each grid point which takes into account all Lebedev, Voronoi and radial weights. END_DOC + implicit none - integer :: i,j,k,l,m - double precision :: r(3) - double precision :: accu,cell_function_becke - double precision :: tmp_array(nucl_num) - double precision :: contrib_integration,x - double precision :: derivative_knowles_function,knowles_function - ! run over all points_extra in space - do j = 1, nucl_num ! that are referred to each atom - do i = 1, n_points_extra_radial_grid -1 !for each radial grid attached to the "jth" atom - x = grid_points_extra_radial(i) ! x value for the mapping of the [0, +\infty] to [0,1] - do k = 1, n_points_extra_integration_angular ! for each angular point attached to the "jth" atom - contrib_integration = derivative_knowles_function(alpha_knowles(grid_atomic_number(j)),m_knowles,x)& - *knowles_function(alpha_knowles(grid_atomic_number(j)),m_knowles,x)**2 - final_weight_at_r_extra(k,i,j) = weights_angular_points_extra(k) * weight_at_r_extra(k,i,j) * contrib_integration * dr_radial_extra_integral - if(isnan(final_weight_at_r_extra(k,i,j)))then - print*,'isnan(final_weight_at_r_extra(k,i,j))' - print*,k,i,j - write(*,'(100(F16.10,X))')weights_angular_points_extra(k) , weight_at_r_extra(k,i,j) , contrib_integration , dr_radial_extra_integral - stop - endif + integer :: i, j, k, l, m + double precision :: r(3) + double precision :: tmp_array(nucl_num) + double precision :: contrib_integration, x, tmp + double precision, external :: derivative_knowles_function, knowles_function + + PROVIDE extra_rad_grid_type + if(extra_rad_grid_type .eq. "KNOWLES") then + + ! run over all points_extra in space + do j = 1, nucl_num ! that are referred to each atom + do i = 1, n_points_extra_radial_grid -1 !for each radial grid attached to the "jth" atom + x = grid_points_extra_radial(i) ! x value for the mapping of the [0, +\infty] to [0,1] + do k = 1, n_points_extra_integration_angular ! for each angular point attached to the "jth" atom + contrib_integration = derivative_knowles_function(alpha_knowles(grid_atomic_number(j)),m_knowles,x)& + * knowles_function(alpha_knowles(grid_atomic_number(j)),m_knowles,x)**2 + final_weight_at_r_extra(k,i,j) = weights_angular_points_extra(k) * weight_at_r_extra(k,i,j) * contrib_integration * dr_radial_extra_integral + if(isnan(final_weight_at_r_extra(k,i,j)))then + print*,'isnan(final_weight_at_r_extra(k,i,j))' + print*,k,i,j + write(*,'(100(F16.10,X))')weights_angular_points_extra(k) , weight_at_r_extra(k,i,j) , contrib_integration , dr_radial_extra_integral + stop + endif + enddo enddo enddo - enddo + + elseif(extra_rad_grid_type .eq. "GILL") then + ! GILL & CHIEN, 2002 + + PROVIDE R_gill + tmp = 2.d0 * R_gill * R_gill * R_gill * dble(n_points_extra_radial_grid) + + ! run over all points_extra in space + do j = 1, nucl_num ! that are referred to each atom + do i = 1, n_points_extra_radial_grid -1 !for each radial grid attached to the "jth" atom + contrib_integration = tmp * dble(i-1)**5 / dble(n_points_extra_radial_grid-i+1)**7 + + do k = 1, n_points_extra_integration_angular ! for each angular point attached to the "jth" atom + final_weight_at_r_extra(k,i,j) = weights_angular_points_extra(k) * weight_at_r_extra(k,i,j) * contrib_integration + if(isnan(final_weight_at_r_extra(k,i,j)))then + print*,'isnan(final_weight_at_r_extra(k,i,j))' + print*,k,i,j + write(*,'(100(F16.10,X))') weights_angular_points_extra(k), weight_at_r_extra(k,i,j), contrib_integration + stop + endif + enddo + enddo + enddo + + else + + print*, " extra_rad_grid_type = ", extra_rad_grid_type, ' is not implemented' + stop + + endif + END_PROVIDER diff --git a/src/becke_numerical_grid/extra_grid_vector.irp.f b/src/becke_numerical_grid/extra_grid_vector.irp.f index 3a5e6d3c..e4fc03b5 100644 --- a/src/becke_numerical_grid/extra_grid_vector.irp.f +++ b/src/becke_numerical_grid/extra_grid_vector.irp.f @@ -1,26 +1,35 @@ +! --- + BEGIN_PROVIDER [integer, n_points_extra_final_grid] - implicit none + BEGIN_DOC ! Number of points_extra which are non zero END_DOC - integer :: i,j,k,l + + implicit none + integer :: i, j, k, l + n_points_extra_final_grid = 0 + do j = 1, nucl_num do i = 1, n_points_extra_radial_grid -1 do k = 1, n_points_extra_integration_angular - if(dabs(final_weight_at_r_extra(k,i,j)) < thresh_extra_grid)then + if(dabs(final_weight_at_r_extra(k,i,j)) < thresh_extra_grid) then cycle endif n_points_extra_final_grid += 1 enddo enddo enddo + print*,'n_points_extra_final_grid = ',n_points_extra_final_grid print*,'n max point = ',n_points_extra_integration_angular*(n_points_extra_radial_grid*nucl_num - 1) ! call ezfio_set_becke_numerical_grid_n_points_extra_final_grid(n_points_extra_final_grid) END_PROVIDER +! --- + BEGIN_PROVIDER [double precision, final_grid_points_extra, (3,n_points_extra_final_grid)] &BEGIN_PROVIDER [double precision, final_weight_at_r_vector_extra, (n_points_extra_final_grid) ] &BEGIN_PROVIDER [integer, index_final_points_extra, (3,n_points_extra_final_grid) ] diff --git a/src/becke_numerical_grid/grid_becke.irp.f b/src/becke_numerical_grid/grid_becke.irp.f index 79f15c9a..21b9f98d 100644 --- a/src/becke_numerical_grid/grid_becke.irp.f +++ b/src/becke_numerical_grid/grid_becke.irp.f @@ -1,103 +1,174 @@ + +! --- + BEGIN_PROVIDER [integer, n_points_radial_grid] &BEGIN_PROVIDER [integer, n_points_integration_angular] - implicit none - BEGIN_DOC - ! n_points_radial_grid = number of radial grid points per atom - ! - ! n_points_integration_angular = number of angular grid points per atom - ! - ! These numbers are automatically set by setting the grid_type_sgn parameter - END_DOC -if(.not.my_grid_becke)then - select case (grid_type_sgn) - case(0) - n_points_radial_grid = 23 - n_points_integration_angular = 170 - case(1) - n_points_radial_grid = 50 - n_points_integration_angular = 194 - case(2) - n_points_radial_grid = 75 - n_points_integration_angular = 302 - case(3) - n_points_radial_grid = 99 - n_points_integration_angular = 590 - case default - write(*,*) '!!! Quadrature grid not available !!!' - stop - end select -else - n_points_radial_grid = my_n_pt_r_grid - n_points_integration_angular = my_n_pt_a_grid -endif + + BEGIN_DOC + ! n_points_radial_grid = number of radial grid points per atom + ! + ! n_points_integration_angular = number of angular grid points per atom + ! + ! These numbers are automatically set by setting the grid_type_sgn parameter + END_DOC + + implicit none + + if(.not.my_grid_becke)then + select case (grid_type_sgn) + case(0) + n_points_radial_grid = 23 + n_points_integration_angular = 170 + case(1) + n_points_radial_grid = 50 + n_points_integration_angular = 194 + case(2) + n_points_radial_grid = 75 + n_points_integration_angular = 302 + case(3) + n_points_radial_grid = 99 + n_points_integration_angular = 590 + case default + write(*,*) '!!! Quadrature grid not available !!!' + stop + end select + else + n_points_radial_grid = my_n_pt_r_grid + n_points_integration_angular = my_n_pt_a_grid + endif + END_PROVIDER +! --- + BEGIN_PROVIDER [integer, n_points_grid_per_atom] - implicit none + BEGIN_DOC ! Number of grid points per atom END_DOC + + implicit none + n_points_grid_per_atom = n_points_integration_angular * n_points_radial_grid END_PROVIDER -BEGIN_PROVIDER [integer , m_knowles] - implicit none +! --- + +BEGIN_PROVIDER [integer, m_knowles] + BEGIN_DOC ! value of the "m" parameter in the equation (7) of the paper of Knowles (JCP, 104, 1996) END_DOC + + implicit none + m_knowles = 3 + END_PROVIDER +! --- + +BEGIN_PROVIDER [double precision, R_gill] + + implicit none + + R_gill = 3.d0 + +END_PROVIDER + +! --- + BEGIN_PROVIDER [double precision, grid_points_radial, (n_points_radial_grid)] &BEGIN_PROVIDER [double precision, dr_radial_integral] - implicit none BEGIN_DOC ! points in [0,1] to map the radial integral [0,\infty] END_DOC - dr_radial_integral = 1.d0/dble(n_points_radial_grid-1) - integer :: i + + implicit none + integer :: i + + dr_radial_integral = 1.d0 / dble(n_points_radial_grid-1) + do i = 1, n_points_radial_grid grid_points_radial(i) = dble(i-1) * dr_radial_integral enddo END_PROVIDER +! --- + BEGIN_PROVIDER [double precision, grid_points_per_atom, (3,n_points_integration_angular,n_points_radial_grid,nucl_num)] + BEGIN_DOC ! x,y,z coordinates of grid points used for integration in 3d space END_DOC + implicit none - integer :: i,j,k - double precision :: dr,x_ref,y_ref,z_ref - double precision :: knowles_function - do i = 1, nucl_num - x_ref = nucl_coord(i,1) - y_ref = nucl_coord(i,2) - z_ref = nucl_coord(i,3) - do j = 1, n_points_radial_grid-1 - double precision :: x,r - ! x value for the mapping of the [0, +\infty] to [0,1] - x = grid_points_radial(j) + integer :: i, j, k + double precision :: dr, x_ref, y_ref, z_ref + double precision :: x, r, tmp + double precision, external :: knowles_function - ! value of the radial coordinate for the integration - r = knowles_function(alpha_knowles(grid_atomic_number(i)),m_knowles,x) + grid_points_per_atom = 0.d0 - ! explicit values of the grid points centered around each atom - do k = 1, n_points_integration_angular - grid_points_per_atom(1,k,j,i) = & - x_ref + angular_quadrature_points(k,1) * r - grid_points_per_atom(2,k,j,i) = & - y_ref + angular_quadrature_points(k,2) * r - grid_points_per_atom(3,k,j,i) = & - z_ref + angular_quadrature_points(k,3) * r + PROVIDE rad_grid_type + if(rad_grid_type .eq. "KNOWLES") then + + do i = 1, nucl_num + x_ref = nucl_coord(i,1) + y_ref = nucl_coord(i,2) + z_ref = nucl_coord(i,3) + do j = 1, n_points_radial_grid-1 + + ! x value for the mapping of the [0, +\infty] to [0,1] + x = grid_points_radial(j) + ! value of the radial coordinate for the integration + r = knowles_function(alpha_knowles(grid_atomic_number(i)), m_knowles, x) + + ! explicit values of the grid points centered around each atom + do k = 1, n_points_integration_angular + grid_points_per_atom(1,k,j,i) = x_ref + angular_quadrature_points(k,1) * r + grid_points_per_atom(2,k,j,i) = y_ref + angular_quadrature_points(k,2) * r + grid_points_per_atom(3,k,j,i) = z_ref + angular_quadrature_points(k,3) * r + enddo enddo enddo - enddo + + elseif(rad_grid_type .eq. "GILL") then + ! GILL & CHIEN, 2002 + + do i = 1, nucl_num + x_ref = nucl_coord(i,1) + y_ref = nucl_coord(i,2) + z_ref = nucl_coord(i,3) + do j = 1, n_points_radial_grid-1 + + r = R_gill * dble(j-1)**2 / dble(n_points_radial_grid-j+1)**2 + + ! explicit values of the grid points centered around each atom + do k = 1, n_points_integration_angular + grid_points_per_atom(1,k,j,i) = x_ref + angular_quadrature_points(k,1) * r + grid_points_per_atom(2,k,j,i) = y_ref + angular_quadrature_points(k,2) * r + grid_points_per_atom(3,k,j,i) = z_ref + angular_quadrature_points(k,3) * r + enddo + enddo + enddo + + else + + print*, " rad_grid_type = ", rad_grid_type, ' is not implemented' + stop + + endif + END_PROVIDER -BEGIN_PROVIDER [double precision, weight_at_r, (n_points_integration_angular,n_points_radial_grid,nucl_num) ] +! --- + +BEGIN_PROVIDER [double precision, weight_at_r, (n_points_integration_angular,n_points_radial_grid,nucl_num)] + BEGIN_DOC ! Weight function at grid points : w_n(r) according to the equation (22) ! of Becke original paper (JCP, 88, 1988) @@ -106,11 +177,13 @@ BEGIN_PROVIDER [double precision, weight_at_r, (n_points_integration_angular,n_p ! represented by the last dimension and the points are labelled by the ! other dimensions. END_DOC + implicit none - integer :: i,j,k,l,m - double precision :: r(3) - double precision :: accu,cell_function_becke - double precision :: tmp_array(nucl_num) + integer :: i, j, k, l, m + double precision :: r(3), accu + double precision :: tmp_array(nucl_num) + double precision, external :: cell_function_becke + ! run over all points in space ! that are referred to each atom do j = 1, nucl_num @@ -121,28 +194,30 @@ BEGIN_PROVIDER [double precision, weight_at_r, (n_points_integration_angular,n_p r(1) = grid_points_per_atom(1,l,k,j) r(2) = grid_points_per_atom(2,l,k,j) r(3) = grid_points_per_atom(3,l,k,j) + accu = 0.d0 ! For each of these points in space, ou need to evaluate the P_n(r) do i = 1, nucl_num ! function defined for each atom "i" by equation (13) and (21) with k == 3 - tmp_array(i) = cell_function_becke(r,i) ! P_n(r) + tmp_array(i) = cell_function_becke(r, i) ! P_n(r) ! Then you compute the summ the P_n(r) function for each of the "r" points accu += tmp_array(i) enddo accu = 1.d0/accu weight_at_r(l,k,j) = tmp_array(j) * accu - if(isnan(weight_at_r(l,k,j)))then - print*,'isnan(weight_at_r(l,k,j))' - print*,l,k,j - accu = 0.d0 - do i = 1, nucl_num - ! function defined for each atom "i" by equation (13) and (21) with k == 3 - tmp_array(i) = cell_function_becke(r,i) ! P_n(r) - print*,i,tmp_array(i) - ! Then you compute the summ the P_n(r) function for each of the "r" points - accu += tmp_array(i) - enddo - write(*,'(100(F16.10,X))')tmp_array(j) , accu + + if(isnan(weight_at_r(l,k,j))) then + print*,'isnan(weight_at_r(l,k,j))' + print*,l,k,j + accu = 0.d0 + do i = 1, nucl_num + ! function defined for each atom "i" by equation (13) and (21) with k == 3 + tmp_array(i) = cell_function_becke(r,i) ! P_n(r) + print*,i,tmp_array(i) + ! Then you compute the summ the P_n(r) function for each of the "r" points + accu += tmp_array(i) + enddo + write(*,'(100(F16.10,X))')tmp_array(j) , accu stop endif enddo @@ -151,35 +226,76 @@ BEGIN_PROVIDER [double precision, weight_at_r, (n_points_integration_angular,n_p END_PROVIDER +! --- + +BEGIN_PROVIDER [double precision, final_weight_at_r, (n_points_integration_angular,n_points_radial_grid,nucl_num)] -BEGIN_PROVIDER [double precision, final_weight_at_r, (n_points_integration_angular,n_points_radial_grid,nucl_num) ] BEGIN_DOC - ! Total weight on each grid point which takes into account all Lebedev, Voronoi and radial weights. + ! Total weight on each grid point which takes into account all Lebedev, Voronoi and radial weights. END_DOC + implicit none - integer :: i,j,k,l,m - double precision :: r(3) - double precision :: accu,cell_function_becke - double precision :: tmp_array(nucl_num) - double precision :: contrib_integration,x - double precision :: derivative_knowles_function,knowles_function - ! run over all points in space - do j = 1, nucl_num ! that are referred to each atom - do i = 1, n_points_radial_grid -1 !for each radial grid attached to the "jth" atom - x = grid_points_radial(i) ! x value for the mapping of the [0, +\infty] to [0,1] - do k = 1, n_points_integration_angular ! for each angular point attached to the "jth" atom - contrib_integration = derivative_knowles_function(alpha_knowles(grid_atomic_number(j)),m_knowles,x)& - *knowles_function(alpha_knowles(grid_atomic_number(j)),m_knowles,x)**2 - final_weight_at_r(k,i,j) = weights_angular_points(k) * weight_at_r(k,i,j) * contrib_integration * dr_radial_integral - if(isnan(final_weight_at_r(k,i,j)))then - print*,'isnan(final_weight_at_r(k,i,j))' - print*,k,i,j - write(*,'(100(F16.10,X))')weights_angular_points(k) , weight_at_r(k,i,j) , contrib_integration , dr_radial_integral - stop - endif + integer :: i, j, k, l, m + double precision :: r(3) + double precision :: tmp_array(nucl_num) + double precision :: contrib_integration, x, tmp + double precision, external :: derivative_knowles_function, knowles_function + + final_weight_at_r = 0.d0 + + PROVIDE rad_grid_type + if(rad_grid_type .eq. "KNOWLES") then + + ! run over all points in space + do j = 1, nucl_num ! that are referred to each atom + do i = 1, n_points_radial_grid -1 !for each radial grid attached to the "jth" atom + x = grid_points_radial(i) ! x value for the mapping of the [0, +\infty] to [0,1] + + do k = 1, n_points_integration_angular ! for each angular point attached to the "jth" atom + contrib_integration = derivative_knowles_function(alpha_knowles(grid_atomic_number(j)), m_knowles, x) & + * knowles_function(alpha_knowles(grid_atomic_number(j)), m_knowles, x)**2 + + final_weight_at_r(k,i,j) = weights_angular_points(k) * weight_at_r(k,i,j) * contrib_integration * dr_radial_integral + + if(isnan(final_weight_at_r(k,i,j))) then + print*,'isnan(final_weight_at_r(k,i,j))' + print*,k,i,j + write(*,'(100(F16.10,X))') weights_angular_points(k), weight_at_r(k,i,j), contrib_integration + stop + endif + enddo enddo enddo - enddo + + elseif(rad_grid_type .eq. "GILL") then + ! GILL & CHIEN, 2002 + + tmp = 2.d0 * R_gill * R_gill * R_gill * dble(n_points_radial_grid) + + ! run over all points in space + do j = 1, nucl_num ! that are referred to each atom + do i = 1, n_points_radial_grid - 1 !for each radial grid attached to the "jth" atom + contrib_integration = tmp * dble(i-1)**5 / dble(n_points_radial_grid-i+1)**7 + do k = 1, n_points_integration_angular ! for each angular point attached to the "jth" atom + final_weight_at_r(k,i,j) = weights_angular_points(k) * weight_at_r(k,i,j) * contrib_integration + + if(isnan(final_weight_at_r(k,i,j))) then + print*,'isnan(final_weight_at_r(k,i,j))' + print*,k,i,j + write(*,'(100(F16.10,X))') weights_angular_points(k), weight_at_r(k,i,j), contrib_integration, dr_radial_integral + stop + endif + enddo + enddo + enddo + + else + + print*, " rad_grid_type = ", rad_grid_type, ' is not implemented' + stop + + endif END_PROVIDER + diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f index 343bd054..fd185641 100644 --- a/src/becke_numerical_grid/grid_becke_vector.irp.f +++ b/src/becke_numerical_grid/grid_becke_vector.irp.f @@ -21,22 +21,27 @@ BEGIN_PROVIDER [integer, n_points_final_grid] call ezfio_set_becke_numerical_grid_n_points_final_grid(n_points_final_grid) END_PROVIDER - BEGIN_PROVIDER [double precision, final_grid_points, (3,n_points_final_grid)] -&BEGIN_PROVIDER [double precision, final_weight_at_r_vector, (n_points_final_grid) ] -&BEGIN_PROVIDER [integer, index_final_points, (3,n_points_final_grid) ] -&BEGIN_PROVIDER [integer, index_final_points_reverse, (n_points_integration_angular,n_points_radial_grid,nucl_num) ] - implicit none +! --- + + BEGIN_PROVIDER [double precision, final_grid_points, (3,n_points_final_grid)] +&BEGIN_PROVIDER [double precision, final_weight_at_r_vector, (n_points_final_grid)] +&BEGIN_PROVIDER [integer, index_final_points, (3,n_points_final_grid)] +&BEGIN_PROVIDER [integer, index_final_points_reverse, (n_points_integration_angular,n_points_radial_grid,nucl_num)] + BEGIN_DOC -! final_grid_points(1:3,j) = (/ x, y, z /) of the jth grid point -! -! final_weight_at_r_vector(i) = Total weight function of the ith grid point which contains the Lebedev, Voronoi and radial weights contributions -! -! index_final_points(1:3,i) = gives the angular, radial and atomic indices associated to the ith grid point -! -! index_final_points_reverse(i,j,k) = index of the grid point having i as angular, j as radial and l as atomic indices + ! final_grid_points(1:3,j) = (/ x, y, z /) of the jth grid point + ! + ! final_weight_at_r_vector(i) = Total weight function of the ith grid point which contains the Lebedev, Voronoi and radial weights contributions + ! + ! index_final_points(1:3,i) = gives the angular, radial and atomic indices associated to the ith grid point + ! + ! index_final_points_reverse(i,j,k) = index of the grid point having i as angular, j as radial and l as atomic indices END_DOC - integer :: i,j,k,l,i_count - double precision :: r(3) + + implicit none + integer :: i, j, k, l, i_count + double precision :: r(3) + i_count = 0 do j = 1, nucl_num do i = 1, n_points_radial_grid -1 @@ -59,6 +64,8 @@ END_PROVIDER END_PROVIDER +! --- + BEGIN_PROVIDER [double precision, final_grid_points_transp, (n_points_final_grid,3)] implicit none BEGIN_DOC diff --git a/src/becke_numerical_grid/integration_radial.irp.f b/src/becke_numerical_grid/integration_radial.irp.f index 44c83070..3de151ab 100644 --- a/src/becke_numerical_grid/integration_radial.irp.f +++ b/src/becke_numerical_grid/integration_radial.irp.f @@ -1,71 +1,93 @@ - double precision function knowles_function(alpha,m,x) - implicit none - BEGIN_DOC -! Function proposed by Knowles (JCP, 104, 1996) for distributing the radial points : -! the Log "m" function ( equation (7) in the paper ) - END_DOC - double precision, intent(in) :: alpha,x - integer, intent(in) :: m -!print*, x - knowles_function = -alpha * dlog(1.d0-x**m) - end - double precision function derivative_knowles_function(alpha,m,x) - implicit none - BEGIN_DOC -! Derivative of the function proposed by Knowles (JCP, 104, 1996) for distributing the radial points - END_DOC - double precision, intent(in) :: alpha,x - integer, intent(in) :: m - double precision :: f - f = x**(m-1) - derivative_knowles_function = alpha * dble(m) * f / (1.d0 - x*f) - end +! --- - BEGIN_PROVIDER [double precision, alpha_knowles, (100)] - implicit none - integer :: i - BEGIN_DOC -! Recommended values for the alpha parameters according to the paper of Knowles (JCP, 104, 1996) -! as a function of the nuclear charge - END_DOC +double precision function knowles_function(alpha, m, x) - ! H-He - alpha_knowles(1) = 5.d0 - alpha_knowles(2) = 5.d0 + BEGIN_DOC + ! Function proposed by Knowles (JCP, 104, 1996) for distributing the radial points : + ! the Log "m" function ( equation (7) in the paper ) + END_DOC + + implicit none + double precision, intent(in) :: alpha, x + integer, intent(in) :: m - ! Li-Be - alpha_knowles(3) = 7.d0 - alpha_knowles(4) = 7.d0 + !print*, x + knowles_function = -alpha * dlog(1.d0-x**m) - ! B-Ne - do i = 5, 10 - alpha_knowles(i) = 5.d0 - enddo + return +end - ! Na-Mg - do i = 11, 12 - alpha_knowles(i) = 7.d0 - enddo +! --- - ! Al-Ar - do i = 13, 18 - alpha_knowles(i) = 5.d0 - enddo +double precision function derivative_knowles_function(alpha, m, x) - ! K-Ca - do i = 19, 20 - alpha_knowles(i) = 7.d0 - enddo + BEGIN_DOC + ! Derivative of the function proposed by Knowles (JCP, 104, 1996) for distributing the radial points + END_DOC - ! Sc-Zn - do i = 21, 30 - alpha_knowles(i) = 5.d0 - enddo + implicit none + double precision, intent(in) :: alpha, x + integer, intent(in) :: m + double precision :: f - ! Ga-Kr - do i = 31, 100 - alpha_knowles(i) = 7.d0 - enddo + f = x**(m-1) + derivative_knowles_function = alpha * dble(m) * f / (1.d0 - x*f) + + return +end + +! --- + +BEGIN_PROVIDER [double precision, alpha_knowles, (100)] + + BEGIN_DOC + ! Recommended values for the alpha parameters according to the paper of Knowles (JCP, 104, 1996) + ! as a function of the nuclear charge + END_DOC + + implicit none + integer :: i + + ! H-He + alpha_knowles(1) = 5.d0 + alpha_knowles(2) = 5.d0 + + ! Li-Be + alpha_knowles(3) = 7.d0 + alpha_knowles(4) = 7.d0 + + ! B-Ne + do i = 5, 10 + alpha_knowles(i) = 5.d0 + enddo + + ! Na-Mg + do i = 11, 12 + alpha_knowles(i) = 7.d0 + enddo + + ! Al-Ar + do i = 13, 18 + alpha_knowles(i) = 5.d0 + enddo + + ! K-Ca + do i = 19, 20 + alpha_knowles(i) = 7.d0 + enddo + + ! Sc-Zn + do i = 21, 30 + alpha_knowles(i) = 5.d0 + enddo + + ! Ga-Kr + do i = 31, 100 + alpha_knowles(i) = 7.d0 + enddo + +END_PROVIDER + +! --- - END_PROVIDER diff --git a/src/becke_numerical_grid/step_function_becke.irp.f b/src/becke_numerical_grid/step_function_becke.irp.f index 2905c6c0..6048c35f 100644 --- a/src/becke_numerical_grid/step_function_becke.irp.f +++ b/src/becke_numerical_grid/step_function_becke.irp.f @@ -20,31 +20,42 @@ double precision function f_function_becke(x) f_function_becke = 1.5d0 * x - 0.5d0 * x*x*x end -double precision function cell_function_becke(r,atom_number) - implicit none - double precision, intent(in) :: r(3) - integer, intent(in) :: atom_number +! --- + +double precision function cell_function_becke(r, atom_number) + BEGIN_DOC -! atom_number :: atom on which the cell function of Becke (1988, JCP,88(4)) + ! atom_number :: atom on which the cell function of Becke (1988, JCP,88(4)) ! r(1:3) :: x,y,z coordinantes of the current point END_DOC - double precision :: mu_ij,nu_ij - double precision :: distance_i,distance_j,step_function_becke - integer :: j - distance_i = (r(1) - nucl_coord_transp(1,atom_number) ) * (r(1) - nucl_coord_transp(1,atom_number)) + + implicit none + double precision, intent(in) :: r(3) + integer, intent(in) :: atom_number + integer :: j + double precision :: mu_ij, nu_ij + double precision :: distance_i, distance_j, step_function_becke + + distance_i = (r(1) - nucl_coord_transp(1,atom_number) ) * (r(1) - nucl_coord_transp(1,atom_number)) distance_i += (r(2) - nucl_coord_transp(2,atom_number) ) * (r(2) - nucl_coord_transp(2,atom_number)) distance_i += (r(3) - nucl_coord_transp(3,atom_number) ) * (r(3) - nucl_coord_transp(3,atom_number)) - distance_i = dsqrt(distance_i) + distance_i = dsqrt(distance_i) + cell_function_becke = 1.d0 do j = 1, nucl_num - if(j==atom_number)cycle - distance_j = (r(1) - nucl_coord_transp(1,j) ) * (r(1) - nucl_coord_transp(1,j)) - distance_j+= (r(2) - nucl_coord_transp(2,j) ) * (r(2) - nucl_coord_transp(2,j)) - distance_j+= (r(3) - nucl_coord_transp(3,j) ) * (r(3) - nucl_coord_transp(3,j)) - distance_j = dsqrt(distance_j) - mu_ij = (distance_i - distance_j)*nucl_dist_inv(atom_number,j) + if(j==atom_number) cycle + + distance_j = (r(1) - nucl_coord_transp(1,j) ) * (r(1) - nucl_coord_transp(1,j)) + distance_j += (r(2) - nucl_coord_transp(2,j) ) * (r(2) - nucl_coord_transp(2,j)) + distance_j += (r(3) - nucl_coord_transp(3,j) ) * (r(3) - nucl_coord_transp(3,j)) + distance_j = dsqrt(distance_j) + + mu_ij = (distance_i - distance_j) * nucl_dist_inv(atom_number,j) nu_ij = mu_ij + slater_bragg_type_inter_distance_ua(atom_number,j) * (1.d0 - mu_ij*mu_ij) + cell_function_becke *= step_function_becke(nu_ij) enddo + + return end diff --git a/src/non_h_ints_mu/j12_nucl_utils.irp.f b/src/non_h_ints_mu/j12_nucl_utils.irp.f index 079cb388..9b91a8ed 100644 --- a/src/non_h_ints_mu/j12_nucl_utils.irp.f +++ b/src/non_h_ints_mu/j12_nucl_utils.irp.f @@ -59,7 +59,7 @@ BEGIN_PROVIDER [ double precision, v_1b, (n_points_final_grid)] else - print*, 'j1b_type = ', j1b_pen, 'is not implemented for v_1b' + print*, 'j1b_type = ', j1b_type, 'is not implemented for v_1b' stop endif @@ -158,7 +158,7 @@ BEGIN_PROVIDER [double precision, v_1b_grad, (3, n_points_final_grid)] else - print*, 'j1b_type = ', j1b_pen, 'is not implemented' + print*, 'j1b_type = ', j1b_type, 'is not implemented' stop endif From ae227aac33290e8689f8543d8698afda8af563a5 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Wed, 17 May 2023 01:21:22 +0200 Subject: [PATCH 28/29] todo: multiply F_TC by 0.5 --- src/tc_scf/rh_tcscf_diis.irp.f | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/tc_scf/rh_tcscf_diis.irp.f b/src/tc_scf/rh_tcscf_diis.irp.f index 5901911c..61355498 100644 --- a/src/tc_scf/rh_tcscf_diis.irp.f +++ b/src/tc_scf/rh_tcscf_diis.irp.f @@ -1,5 +1,10 @@ ! --- +! TODO +! level shift of SCF is well adapted +! for 0.5 x F +! + subroutine rh_tcscf_diis() implicit none From e3d8e28e23a7c6b19cb52b0aa8b2af0739207f19 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Thu, 18 May 2023 20:57:55 +0200 Subject: [PATCH 29/29] TC-SCF: no rotations for good angles --- src/tc_scf/rh_tcscf_diis.irp.f | 2 +- src/tc_scf/rh_tcscf_simple.irp.f | 2 +- src/tc_scf/routines_rotates.irp.f | 12 ++++++++---- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/tc_scf/rh_tcscf_diis.irp.f b/src/tc_scf/rh_tcscf_diis.irp.f index 61355498..20260a95 100644 --- a/src/tc_scf/rh_tcscf_diis.irp.f +++ b/src/tc_scf/rh_tcscf_diis.irp.f @@ -236,7 +236,7 @@ subroutine rh_tcscf_diis() ! --- print *, ' TCSCF DIIS converged !' - !call print_energy_and_mos() + !call print_energy_and_mos(good_angles) call write_time(6) deallocate(mo_r_coef_save, mo_l_coef_save, F_DIIS, E_DIIS) diff --git a/src/tc_scf/rh_tcscf_simple.irp.f b/src/tc_scf/rh_tcscf_simple.irp.f index 30798e3d..0b79e8ea 100644 --- a/src/tc_scf/rh_tcscf_simple.irp.f +++ b/src/tc_scf/rh_tcscf_simple.irp.f @@ -119,7 +119,7 @@ subroutine rh_tcscf_simple() endif print *, ' TCSCF Simple converged !' - call print_energy_and_mos() + !call print_energy_and_mos(good_angles) deallocate(rho_old, rho_new) diff --git a/src/tc_scf/routines_rotates.irp.f b/src/tc_scf/routines_rotates.irp.f index 4ac66b5f..755c35b9 100644 --- a/src/tc_scf/routines_rotates.irp.f +++ b/src/tc_scf/routines_rotates.irp.f @@ -59,7 +59,7 @@ subroutine minimize_tc_orb_angles() good_angles = .False. thr_deg = thr_degen_tc - call print_energy_and_mos() + call print_energy_and_mos(good_angles) print *, ' Minimizing the angles between the TC orbitals' i = 1 @@ -78,7 +78,7 @@ subroutine minimize_tc_orb_angles() print *, ' Converged ANGLES MINIMIZATION !!' call print_angles_tc() - call print_energy_and_mos() + call print_energy_and_mos(good_angles) end @@ -386,10 +386,11 @@ end ! --- -subroutine print_energy_and_mos() +subroutine print_energy_and_mos(good_angles) implicit none - integer :: i + logical, intent(out) :: good_angles + integer :: i print *, ' ' print *, ' TC energy = ', TC_HF_energy @@ -398,10 +399,13 @@ subroutine print_energy_and_mos() if(max_angle_left_right .lt. 45.d0) then print *, ' Maximum angle BELOW 45 degrees, everthing is OK !' + good_angles = .true. else if(max_angle_left_right .gt. 45.d0 .and. max_angle_left_right .lt. 75.d0) then print *, ' Maximum angle between 45 and 75 degrees, this is not the best for TC-CI calculations ...' + good_angles = .false. else if(max_angle_left_right .gt. 75.d0) then print *, ' Maximum angle between ABOVE 75 degrees, YOU WILL CERTAINLY FIND TROUBLES IN TC-CI calculations ...' + good_angles = .false. endif print *, ' Diag Fock elem, product of left/right norm, angle left/right '